diff --git a/README.md b/README.md index f670a3f..115e0a0 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,6 @@ You can check the containers on DockerHub: https://hub.docker.com/r/kyuz0/amd-st | `rocm-6.4.2-rocwmma` | ROCm 6.4.2 (HIP) + ROCWMMA | ROCm with ROCWMMA enabled for improved flash attention on RDNA3+/CDNA. | | `rocm-6.4.3` | ROCm 6.4.3 (HIP) + hipBLASLt* | Latest stable ROCm. Great for BF16 models. Occasional crashes possible. | | `rocm-6.4.3-rocwmma` | ROCm 6.4.3 (HIP) + ROCWMMA + hipBLASLt* | ROCm with ROCWMMA enabled for improved flash attention on RDNA3+/CDNA. | -| `rocm-7beta` | ROCm 7.0 Beta (HIP) + hipBLASLt* | Latest ROCm beta. No real gain for Llama.cpp. Same model limits as 6.4.2. | | `rocm-7rc` | ROCm 7.0 RC (HIP) + hipBLASLt* | Release candidate for ROCm 7.0. Same behavior as beta. | | `rocm-7rc-rocwmma` | ROCm 7.0 RC (HIP) + ROCWMMA + hipBLASLt* | Release candidate for ROCm 7.0, with hipBLASLt and ROCWMMA for improved flash attention on RDNA3+/CDNA | diff --git a/benchmark/generate_results.json.py b/benchmark/generate_results.json.py index 4a0937a..c01279c 100644 --- a/benchmark/generate_results.json.py +++ b/benchmark/generate_results.json.py @@ -235,7 +235,7 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))): # Meta meta = { "generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - "os_kernel": "Fedora 42 — Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)", + "os_kernel": "Fedora 42 — Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)], "environments": sorted(envs), "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second", diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log index f7d6678..3e888bb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x19cb8050) reason :GPU Hang +Memory access fault by GPU node-1 (Agent handle: 0x275a2540) on address 0x7f3fb2c08000. Reason: Page not present or supervisor privilege. ✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log index b800555..694fdbe 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.31 ± 0.13 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.97 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x25d19540) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log index 379088a..d19e880 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 130.07 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.48 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 131.14 ± 0.28 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.15 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log index 8df6842..e6a5d48 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x50e2050) reason :GPU Hang -✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 104.12 ± 0.05 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.35 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..7465f25 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.62 ± 0.10 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.95 ± 0.02 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log similarity index 68% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index 94079a7..f8e8b6b 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 103.63 ± 0.10 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.09 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 135.10 ± 0.35 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.14 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index e1a550e..fad8a13 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 129.88 ± 0.57 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.43 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 130.99 ± 0.36 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.14 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 68% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index d5b577e..d4132eb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 100.80 ± 0.14 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.13 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.15 ± 0.41 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.15 ± 0.01 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log new file mode 100644 index 0000000..b84b584 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.66 ± 0.22 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.14 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log new file mode 100644 index 0000000..350aa44 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 100.20 ± 0.13 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.30 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log similarity index 63% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log index d97d416..ec3889b 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2edd2a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x2624d340) reason :GPU Hang +✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..2d21418 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x37c5d340) on address 0x7f2e3516f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 00fe3c5..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.50 ± 0.25 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 20.02 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index 305470f..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 130.22 ± 0.35 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.00 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 7cca3a8..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1f3f20c0) reason :GPU Hang -✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index a945543..d99dfb8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 120.16 ± 0.21 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.96 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.48 ± 0.53 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.11 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index d65e6e5..b74a931 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 133.91 ± 0.57 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 19.94 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.27 ± 0.47 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.86 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 10c8b0a..906c8ff 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.49 ± 0.48 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.95 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 158.54 ± 0.42 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.11 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 1d0ab1d..d51baac 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 138.34 ± 0.27 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.90 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 166.11 ± 0.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.83 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index 9927f0b..3c47854 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.65 ± 0.23 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.91 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 89.60 ± 0.20 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.22 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index 86f99ad..bf63f0d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 100.90 ± 0.22 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.15 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 64.66 ± 0.16 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.35 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log index 441e956..a3b2c08 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.49 ± 0.14 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.88 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x1d380ea0) reason :GPU Hang +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 28b4354..9cc7929 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 103.73 ± 0.14 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.07 ± 0.00 | - -build: 79c1160b (6123) +Memory access fault by GPU node-1 (Agent handle: 0x4a0fea0) on address 0x7f3bf796f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index 15c9127..d8fa4d5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 201.03 ± 0.31 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.82 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 197.95 ± 0.29 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 23.24 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index c0e6775..eece528 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 201.89 ± 0.37 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 22.85 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 199.40 ± 0.35 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 23.26 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index c38f2a1..5b8bc47 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 128.01 ± 0.31 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.92 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.28 ± 0.17 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 23.33 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 12bf239..41d8077 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 132.56 ± 0.31 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 23.31 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 131.64 ± 0.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 23.88 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log index deada9f..3ee3c3e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 124.75 ± 0.42 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.43 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x3e28b540) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log index 6e7bcaa..2b15919 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2d9b050) reason :GPU Hang +Memory access fault by GPU node-1 (Agent handle: 0x2bdf8540) on address 0x7f5f95e35000. Reason: Page not present or supervisor privilege. ✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log index 685d734..63bd38e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.94 ± 0.42 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.35 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x3ff2d540) reason :GPU Hang +✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log index b9a03bd..18f04dd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 100.41 ± 0.16 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.53 ± 0.01 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x3bb3540) reason :GPU Hang +✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log similarity index 54% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log index 5dc10c6..f5f0ad3 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 113.62 ± 0.21 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.47 ± 0.04 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.82 ± 0.35 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.59 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log similarity index 69% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 1208365..2bfbda7 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 90.24 ± 0.13 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.55 ± 0.04 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.60 ± 0.30 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.62 ± 0.04 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log new file mode 100644 index 0000000..159477b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x26e36340) on address 0x7fcef3635000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f625092 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x35263340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log similarity index 54% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log index d1de7a1..8ac4440 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.82 ± 0.18 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.35 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.95 ± 0.30 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.65 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log similarity index 54% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log index 5ed10e0..581bc16 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1527fa90) on address 0x7f55d5f6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) +Memory access fault by GPU node-1 (Agent handle: 0x28aa3340) on address 0x7fb93761b000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log similarity index 58% rename from benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log index d800da3..486bf8f 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3e596050) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x14d05340) reason :GPU Hang +✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log similarity index 57% rename from benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log index c119ae0..fb08717 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2dab2050) reason :GPU Hang -✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x265e8340) reason :GPU Hang +✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log deleted file mode 100644 index d839f3e..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 118.61 ± 0.54 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.51 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log deleted file mode 100644 index 8a0ff5a..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 123.75 ± 0.39 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.48 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 0aab0d1..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x36bce0c0) on address 0x7f6ee1f6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log index 55995b2..8ca5c1e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 118.92 ± 0.39 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.47 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 69.19 ± 0.20 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.64 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 161145c..6eaa1df 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 127.14 ± 0.27 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.47 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 114.61 ± 0.20 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.51 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 5e8d103..5fb6167 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.88 ± 0.92 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.61 ± 0.09 | + +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index d77ab42..ec552ad 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 150.07 ± 0.56 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.52 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log index 97c3a25..dc5e6ec 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 118.52 ± 0.35 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.52 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 69.52 ± 0.17 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.63 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log index c6194e3..c1980f8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 97.36 ± 0.07 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.57 ± 0.02 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 74.02 ± 0.13 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.73 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log index 7f9bb58..350f64d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 142.67 ± 0.75 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.68 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index 8e70bdf..6a1fdfd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x1c536ea0) on address 0x7f623b57e000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log index f5209b4..1a18e9c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 223.59 ± 0.50 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.51 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 219.81 ± 0.70 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.80 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 3ba2fc2..bc34d0a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 225.75 ± 0.69 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.53 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 222.20 ± 0.63 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 16.82 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log index c01b8a2..1c621cf 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.35 ± 0.43 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.80 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.55 ± 0.40 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.07 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log index 3647c19..3dbeebf 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 131.91 ± 0.42 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 17.02 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 131.25 ± 0.50 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.31 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log deleted file mode 100644 index 49b6a40..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0xae0b050) on address 0x7f17943a9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 820c8ea..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 108.88 ± 0.21 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.65 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index ecdf26e..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1f7690e0) on address 0x7f6093d6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index 9bbdd27..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2ae290c0) reason :GPU Hang -✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 3a354c6..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x19f880e0) reason :GPU Hang -✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log deleted file mode 100644 index 01916bd..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 109.02 ± 0.07 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.65 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 4d3f05b..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | pp512 | 117.34 ± 0.09 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | tg128 | 2.65 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 08bd375..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3d74d91..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 343727e..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 109.17 ± 0.12 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.65 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index d9b5fe6..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log deleted file mode 100644 index fac1830..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index f08d646..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 4581b23..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2491416576 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 8835330..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2491416576 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log deleted file mode 100644 index f0955ad..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | pp512 | 78.54 ± 0.14 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | tg128 | 2.67 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 0c2bb42..0000000 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | pp512 | 81.12 ± 0.08 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | tg128 | 2.67 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log index 5c9071b..4fb737a 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xd004050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x33b8a540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log index 182cfd1..8ed4e21 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1fdc2050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x20e35540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log index 1e5d45d..8ad5ab6 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.28 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x1b1ea540) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log index db68588..7860063 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 30.88 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 16.16 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log similarity index 54% rename from benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log index 409a36b..769f2f3 100644 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.32 ± 0.04 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.02 ± 0.18 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index 952dddb..4451e68 100644 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.28 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.74 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.83 ± 0.11 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log new file mode 100644 index 0000000..2553f31 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x21da1340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..1a88dcd --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x15ac2340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log new file mode 100644 index 0000000..56497ee --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.13 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log similarity index 62% rename from benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log index cc4c84c..08a5922 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.69 ± 0.04 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.62 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 80.42 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log new file mode 100644 index 0000000..f5b7147 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2c1e5340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..013fc7c --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x3e536340) on address 0x7f9182f6f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 1275625..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 95.65 ± 0.23 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.74 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index d4fb01f..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2e9460f0) on address 0x7f23cf58a000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index 256df75..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2c3170e0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 51e9900..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0xe3f70e0) on address 0x7f4e23b6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index 2d8ff98..62ab0f7 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 95.63 ± 0.19 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.31 ± 0.20 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 78ef763..1e31a60 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 103.15 ± 0.13 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 100.85 ± 0.13 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 8abfc94..c5612f6 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.00 ± 0.22 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index b85ea71..e557123 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.88 ± 0.09 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log index 7715818..7c6d5fd 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 95.15 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.74 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 99.41 ± 0.36 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log index a10eecc..f65845d 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -:0:rocdevice.cpp :3594: 448132897452 us: Callback: Queue 0x7f7ecc400000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 +Memory access fault by GPU node-1 (Agent handle: 0x1f66bec0) on address 0x7f3e84b6f000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log index e557f4b..44b15c4 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 94.06 ± 0.09 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index f3c72e9..970522b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 30.04 ± 0.04 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.00 | - -build: 79c1160b (6123) +Memory access fault by GPU node-1 (Agent handle: 0xac09ec0) on address 0x7f283f56f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log index ac2d0df..13252f9 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 98.20 ± 0.18 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.75 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 98.03 ± 0.24 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 9e22472..5781898 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 99.14 ± 0.35 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.74 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 99.12 ± 0.25 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log index b4da67c..9500e62 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 79.91 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.75 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 75.59 ± 0.28 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 5e3f60f..6c47ac0 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 82.40 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.75 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 80.09 ± 0.38 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log index b5a7155..6babfa8 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 134.21 ± 0.58 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.43 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x344ea540) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log index 3a30aaf..2f3524f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x10997050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0xe316540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log index f81dd87..1009e19 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 133.77 ± 0.46 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.30 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x17ade540) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log index b6d7516..c7625db 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1732e050) on address 0x7fcb1a36f000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0xe91f540) reason :GPU Hang ✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..ebcf552 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log @@ -0,0 +1,11 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +Memory access fault by GPU node-1 (Agent handle: 0x1a840340) on address 0x7f3babb56000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log similarity index 68% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index 6375788..ab25429 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 103.96 ± 0.18 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.47 ± 0.02 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.08 ± 1.26 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.53 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index f7132fb..65957d1 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 134.39 ± 0.32 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.33 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 134.19 ± 1.49 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.56 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..dcff3bd --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1de78340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log new file mode 100644 index 0000000..f75714d --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.28 ± 1.29 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.58 ± 0.03 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log similarity index 53% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log index 53feea1..7c8dc76 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3b11ea90) reason :GPU Hang -✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) +Memory access fault by GPU node-1 (Agent handle: 0x2162b340) on address 0x7f500556f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log similarity index 56% rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log index d044208..96e7a94 100644 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x432ea90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0xdacf340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..65a2e33 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x3dc00340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log deleted file mode 100644 index e2f4dbe..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x225860e0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 311e082..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -:0:rocdevice.cpp :3675: 454572762136 us: Callback: Queue 0x7fb3f1400000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index 7188df3..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x11dec0e0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log index be95461..64b52d5 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 273.64 ± 0.59 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 | - -build: 34c9d765 (6122) +HW Exception by GPU node-1 (Agent handle: 0x3882bf60) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 70811e5..11cbda1 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 293.87 ± 1.35 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 14.31 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.84 ± 9.41 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.37 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index fa50f09..7575a7e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.97 ± 1.67 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.05 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index e9eb9a3..647f737 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.26 ± 1.79 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.33 ± 0.03 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log index 07c550f..6ac1b55 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 269.30 ± 1.99 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.37 ± 1.65 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.04 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log index 0a2d01d..bc46574 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 225.70 ± 1.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 14.46 ± 0.00 | - -build: 79c1160b (6123) +Memory access fault by GPU node-1 (Agent handle: 0xa893ec0) on address 0x7f070a3a9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log index c9103d5..ca30067 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.16 ± 0.44 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.41 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 269.17 ± 0.99 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.63 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log index 560fe07..56c35f6 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x1db86ec0) on address 0x7f2273f6f000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log index 3cc2007..8c8e292 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 243.54 ± 1.24 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.34 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 242.07 ± 1.05 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.56 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log index 7dafa9d..71556fe 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 246.48 ± 1.35 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.09 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 244.49 ± 1.13 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.33 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log index 80c940d..33a8b80 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 147.36 ± 0.80 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.30 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 147.08 ± 0.98 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.50 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log index e72dffe..caf7973 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 150.06 ± 1.13 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.27 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 149.97 ± 1.10 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.49 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log index 0910303..a8331aa 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.23 ± 0.81 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.62 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x1019d540) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log index 07a24ef..b68e909 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xf461050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x2ff5c540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log index db50520..88fb55c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.29 ± 0.58 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.60 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x3db80540) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log index 4d84455..befc174 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x13dd2050) on address 0x7f6913b6f000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0x24a4c540) reason :GPU Hang ✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log similarity index 54% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log index 8678b7b..40cf34f 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.44 ± 0.76 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.61 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.35 ± 3.39 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.78 ± 0.03 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log similarity index 61% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 911ffe7..4a19b96 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 153.97 ± 1.90 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.98 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.23 ± 3.13 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.73 ± 0.03 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log similarity index 55% rename from benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index 1849a77..65afec2 100644 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x64dea90) reason :GPU Hang -✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x5f69340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 62% rename from benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log index bb5ac9d..98dc8c5 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 99 | 1 | 0 | pp512 | 33.87 ± 0.05 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 99 | 1 | 0 | tg128 | 2.64 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 140.27 ± 0.97 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.74 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log new file mode 100644 index 0000000..8ac0514 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log @@ -0,0 +1,11 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +HW Exception by GPU node-1 (Agent handle: 0x2079b340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log similarity index 53% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log index 099d9b2..8d6068b 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2fba3a90) reason :GPU Hang -✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) +Memory access fault by GPU node-1 (Agent handle: 0x37ff7340) on address 0x7fa76bba9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log new file mode 100644 index 0000000..2add86a --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2a344340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..977948c --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x27934340) on address 0x7f656656f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log deleted file mode 100644 index 1441b69..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 262.13 ± 9.71 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.65 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log deleted file mode 100644 index 4dc7de6..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2b4130e0) on address 0x7f8a7ed6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log deleted file mode 100644 index 236d063..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x12790e0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 3db64ad..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x14e4a0e0) on address 0x7f859916f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log index 930340d..03112eb 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 267.45 ± 1.90 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.60 ± 0.05 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 279.13 ± 2.90 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.79 ± 0.07 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 1ee598c..182bf9d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 293.37 ± 7.08 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 11.54 ± 0.03 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.60 ± 3.84 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.62 ± 0.02 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index b62035e..66c4a41 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.02 ± 2.74 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.79 ± 0.06 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index f49c465..5439b6b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 279.69 ± 2.30 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.60 ± 0.04 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log index 8f33074..f3af1c9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 272.38 ± 1.28 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.64 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 252.38 ± 7.70 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.35 ± 0.60 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log index 3ccfa82..9932c99 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2e56aec0) on address 0x7f4102f6f000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log index 2758045..d31f283 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 271.54 ± 4.10 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.57 ± 0.58 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log index 0ab337b..986706b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x1aa83ec0) on address 0x7f9f1e96f000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log index 40cd552..089be05 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 255.55 ± 1.38 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.27 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 258.54 ± 1.39 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.45 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log index e8041dc..42f4672 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 259.07 ± 1.30 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.11 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 262.84 ± 1.39 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.30 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log index e52091e..e7df5fa 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 168.01 ± 0.85 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.30 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 169.23 ± 0.84 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.45 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log index 154221d..2776458 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 172.71 ± 0.91 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.28 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 173.79 ± 0.85 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.44 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log index f13eb82..90103a9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.82 ± 0.73 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.41 ± 0.00 | - -build: 79c1160b (6123) +Memory access fault by GPU node-1 (Agent handle: 0x3e5ce540) on address 0x7f64d3b76000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log index 5cd6f40..2e11ead 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1624d050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x1239e540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log index d0be2b8..b311bf5 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 137.63 ± 0.80 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.29 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x101f4540) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log index e7a2f72..8ac1834 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 122.98 ± 0.59 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.53 ± 0.00 | - -build: 79c1160b (6123) +Memory access fault by GPU node-1 (Agent handle: 0x15f12540) on address 0x7ef17d976000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..f500cd5 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.51 ± 1.64 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.70 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log similarity index 67% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index c46ae93..85f9bfb 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 233.14 ± 0.90 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.59 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 308.62 ± 2.62 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.54 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log similarity index 78% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 3ec496d..78e4255 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.27 ± 0.66 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.40 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.71 ± 0.62 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.71 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 68% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index bde171a..8945a72 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.58 ± 0.18 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.55 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 142.62 ± 0.82 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.55 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log new file mode 100644 index 0000000..5aa96bf --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.37 ± 1.44 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.70 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log similarity index 67% rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log index ee0d484..98b05eb 100644 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.61 ± 0.50 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.60 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 234.68 ± 1.31 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.71 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log similarity index 55% rename from benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log index 2da2c5e..186cdf8 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xd98d050) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0xa3c7340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..19b75a4 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x118c6340) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index cc14e7d..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 281.87 ± 1.98 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.59 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index 05da15f..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2334b0e0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 20fd2ca..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1b1f20f0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index 60b7302..54d6795 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 290.54 ± 1.59 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.67 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 4935293..6f81260 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 307.08 ± 2.67 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.34 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 304.99 ± 0.37 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.28 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index ac13496..721ffc5 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 138.22 ± 0.46 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.45 ± 0.09 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.93 ± 1.57 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.65 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 12bb02b..241fb21 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.13 ± 1.26 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.27 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index 1e4897a..19895f7 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 281.24 ± 1.95 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.56 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 291.60 ± 1.95 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.73 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index 9eb1c08..d607e30 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x134adec0) on address 0x7f0318984000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log index ca4dda3..a0c7756 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.56 ± 1.41 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.72 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 9086eec..ae6829f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 227.75 ± 1.52 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.73 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index f03b1f2..f89d42c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 218.27 ± 0.80 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.09 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 216.64 ± 2.76 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.39 ± 0.02 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 2706deb..7ee60d9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 220.73 ± 0.69 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.64 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 217.68 ± 4.15 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 19.97 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index 8058f91..002154b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 152.77 ± 0.73 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.02 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 151.98 ± 0.60 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.26 ± 0.02 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 953d42a..64d4625 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 155.24 ± 1.01 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.99 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 154.96 ± 0.82 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.28 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log index 24b7806..1adbae9 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3eeda050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x2f5d1540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log index c0f73f3..9a061f5 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2d723050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0xdc93540) reason :GPU Hang ✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log index 95b0795..a1be58c 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 73.83 ± 0.16 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.68 ± 0.01 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0xff7540) reason :GPU Hang +✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log index 22c3d0b..281a126 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | pp512 | 61.47 ± 0.09 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | tg128 | 13.83 ± 0.00 | - -build: 79c1160b (6123) +HW Exception by GPU node-1 (Agent handle: 0x2607e540) reason :GPU Hang +✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log similarity index 54% rename from benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log index 1bc098e..d927ed6 100644 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 74.15 ± 0.18 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.73 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 130.11 ± 0.68 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.95 ± 0.04 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log new file mode 100644 index 0000000..4638f26 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.31 ± 0.80 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.71 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log similarity index 54% rename from benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index a418f5b..d8218bd 100644 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xcd80a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x8063340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3a247e3 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x18398340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log new file mode 100644 index 0000000..82caf25 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.78 ± 1.03 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.68 ± 0.43 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log similarity index 53% rename from benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log index 40b3223..a0ed178 100644 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x25011a90) on address 0x7fdcc1b6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) +Memory access fault by GPU node-1 (Agent handle: 0x50aa340) on address 0x7f7365ba9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log new file mode 100644 index 0000000..3767019 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1990d340) reason :GPU Hang +✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..f1dc1f5 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x180d4340) on address 0x7f11c8f6f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log deleted file mode 100644 index c4bfe32..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x359cb0e0) reason :GPU Hang -✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log deleted file mode 100644 index 4707f93..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log +++ /dev/null @@ -1,7 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -:0:rocdevice.cpp :3675: 456558403486 us: Callback: Queue 0x7f04ef600000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 -Memory access fault by GPU node-1 (Agent handle: 0x2e8f0f0) on address 0x7eeca7f6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log deleted file mode 100644 index 4554363..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1c2260e0) reason :GPU Hang -✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index d31397b..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x11f900f0) on address 0x7f6f91d6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log index 440a82e..38826b0 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 129.70 ± 0.81 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.66 ± 0.00 | - -build: 34c9d765 (6122) +Memory access fault by GPU node-1 (Agent handle: 0x3c89ef80) on address 0x7f777640a000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 15a84cb..a74258c 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | pp512 | 145.18 ± 0.48 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | tg128 | 13.43 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.61 ± 0.92 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.34 ± 0.02 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 7597916..966ba3e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.33 ± 0.68 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.78 ± 0.04 | + +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index af8634b..22a4e47 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 139.60 ± 0.47 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.03 ± 0.57 | + +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log index d8318fb..daaa296 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 130.56 ± 0.46 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.87 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.95 ± 0.76 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.99 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log index 3d86ec7..0d3acaf 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,6 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | pp512 | 97.08 ± 0.34 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | tg128 | 13.90 ± 0.03 | - -build: 79c1160b (6123) +:0:rocdevice.cpp :3675: 29915649820 us: Callback: Queue 0x7f500c700000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 +Memory access fault by GPU node-1 (Agent handle: 0x2aad9ec0) on address 0x7f37c576f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log index 3cb748b..8135bc1 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 135.29 ± 0.51 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.97 ± 0.04 | + +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index 443bad3..fc4a37f 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x122f2ec0) on address 0x7f10537a9000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log index b14c784..ec978bb 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 114.76 ± 0.62 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.06 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 112.93 ± 0.63 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 16.43 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index c01f816..e560c0b 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 116.18 ± 0.67 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 15.90 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 114.35 ± 1.12 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 16.27 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log index 077cf15..5fbf704 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 64.79 ± 0.39 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.61 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 64.60 ± 0.38 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.03 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index d9c6cb3..e878778 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 66.84 ± 0.42 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 16.86 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 66.60 ± 0.42 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.28 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log index 562318d..4f8da20 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.78 ± 2.71 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.56 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.75 ± 2.58 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.62 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log index 77ad01b..598f905 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 161.64 ± 2.99 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.94 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 161.90 ± 3.05 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.09 ± 0.02 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log index 9a6adc0..ae13cea 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.64 ± 2.49 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.93 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.81 ± 2.51 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.61 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log index 000d477..2791323 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 140.32 ± 1.99 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.32 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 140.24 ± 1.86 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.46 ± 0.02 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log similarity index 54% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log index 3e8cb4e..cd9f858 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 150.37 ± 1.75 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.49 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 438.42 ± 4.14 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.57 ± 0.01 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index af7dc3f..3adfafe 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 162.19 ± 3.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.03 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 475.43 ± 7.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.08 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 3c0cef6..96ee7c7 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.95 ± 2.63 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.53 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 158.13 ± 2.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.58 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index 86ac559..e330db0 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 140.32 ± 2.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 163.40 ± 3.21 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.14 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log similarity index 54% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log index dc8823d..f983fbc 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log @@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.06 ± 1.71 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.13 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 441.36 ± 3.35 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.60 ± 0.01 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log index ded0220..04c9b9c 100644 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 154.09 ± 1.98 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.02 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 337.36 ± 3.48 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.45 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log index 03365ca..c1ff6cb 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.69 ± 2.52 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.89 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 161.73 ± 1.23 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.58 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log index 207a2a1..aaa3b75 100644 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 134.40 ± 1.47 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.32 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 143.05 ± 2.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.42 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 6b685b0..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 424.74 ± 7.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.48 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 2940f7a..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x16acc0c0) on address 0x7f24fed6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index b98ad69..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 154.45 ± 1.39 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.52 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 8773673..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 138.46 ± 1.64 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.29 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log index 5088e35..7f744f7 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 425.56 ± 3.28 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.80 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 448.63 ± 5.90 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.96 ± 0.02 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 20a8ebc..61f996e 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 472.05 ± 4.59 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.12 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 473.34 ± 8.60 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.99 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 6fb55b7..8b322b7 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 153.54 ± 2.25 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.74 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 338.07 ± 3.03 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.93 ± 0.03 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index e0dd74e..a3675a1 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 158.20 ± 2.47 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.12 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 342.57 ± 3.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.97 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log index 1c911fe..c7f3224 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 426.72 ± 7.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.57 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 444.30 ± 6.78 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.66 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log index d2a18a1..8384bda 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x38fecea0) on address 0x7f31ea76f000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log index 25f3bf6..aab676c 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 153.89 ± 1.73 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.57 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 333.42 ± 6.83 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.69 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 53b2312..dba441c 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 137.06 ± 2.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.32 ± 0.01 | - -build: 79c1160b (6123) +Memory access fault by GPU node-1 (Agent handle: 0x1f121ea0) on address 0x7fd78e16f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log index fc1a60f..67aa5d3 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.55 ± 0.11 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.09 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 106.47 ± 0.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.18 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index f84135e..de3ec24 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.68 ± 0.13 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 8.03 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 106.77 ± 0.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.11 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log index 1458372..7ea35f3 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 86.02 ± 0.11 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.46 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 84.71 ± 0.11 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.52 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index ec2a50a..2aa8bc0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.93 ± 0.15 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.44 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 85.70 ± 0.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 7.52 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log index 015e9b4..14182c9 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.45 ± 1.17 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.42 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.23 ± 0.82 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.64 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log index a5d6d7e..63e83fb 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 411.60 ± 0.78 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.14 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 411.72 ± 1.04 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.78 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log index 4565d4e..f33f7c4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 385.52 ± 0.67 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.06 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.86 ± 1.41 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.65 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log index a0064ea..928cc4b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 300.86 ± 0.38 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.71 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 301.23 ± 0.49 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.07 ± 0.02 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..7499112 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 564.83 ± 6.58 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.68 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log index 60992bd..8947515 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 412.35 ± 1.06 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 48.26 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 624.99 ± 3.81 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.64 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log index dc6f1a9..1488828 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.77 ± 0.97 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.31 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 389.25 ± 2.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.66 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log index 2a04531..d3d972e 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 301.29 ± 0.54 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.58 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 412.18 ± 1.15 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.80 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log new file mode 100644 index 0000000..bb39f34 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 562.86 ± 10.14 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.74 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log similarity index 68% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log index bbdc595..e501cb6 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 411.72 ± 2.56 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.76 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 418.07 ± 1.65 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.11 ± 0.01 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log index bd9bc1c..87b8aec 100644 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.72 ± 2.63 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.19 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.74 ± 1.70 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.65 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log similarity index 68% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log index 6a605f7..17416ee 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 300.58 ± 1.17 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 49.78 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 301.31 ± 0.65 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.37 ± 0.02 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log deleted file mode 100644 index 7b4fa67..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 534.84 ± 2.48 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.21 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log deleted file mode 100644 index f59b880..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.34 ± 1.49 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.23 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log index bd849a1..444ca09 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 535.44 ± 6.90 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.07 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 570.31 ± 5.05 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.52 ± 0.02 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log index 57f3363..c11bc62 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 619.02 ± 7.73 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 47.63 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 612.79 ± 4.77 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 46.73 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log index 922286f..e17b52a 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.98 ± 0.76 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.09 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 572.09 ± 8.22 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.45 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index a66f360..19e8321 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 413.28 ± 2.05 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 47.63 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 605.49 ± 1.47 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 46.73 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log index 625d68e..f2e3ece 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 540.14 ± 5.22 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.65 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 573.05 ± 6.77 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.80 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log index 43c8e66..3fa7435 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 418.60 ± 2.58 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.63 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 416.05 ± 3.44 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.33 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log index 74fbd99..db8de56 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 386.87 ± 1.67 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.50 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 576.38 ± 3.91 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.85 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log index 763fdaf..f19e470 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 300.40 ± 1.44 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 49.69 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 414.62 ± 3.23 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.22 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log index 4f467f9..10609cc 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 741.97 ± 2.92 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 57.22 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 733.40 ± 2.59 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 59.36 ± 0.05 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 8cbd25b..486113e 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 731.64 ± 2.80 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 53.53 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 725.54 ± 2.84 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 55.57 ± 0.02 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log index 993ae07..00cd713 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 396.38 ± 1.53 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 59.54 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 392.54 ± 1.80 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 61.56 ± 0.02 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index 296a137..3123ba1 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 406.84 ± 1.62 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 58.50 ± 0.10 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 403.74 ± 1.69 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 60.57 ± 0.08 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log deleted file mode 100644 index d63b9d0..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x168bc050) on address 0x7ef358d6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 4d0291a..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 408.29 ± 1.82 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.53 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 0830d17..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0xf2660e0) on address 0x7fb2199a9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index eb240a1..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 145.29 ± 1.91 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.53 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index d7843f5..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 130.39 ± 1.57 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.31 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log deleted file mode 100644 index 0a1bfa8..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 414.47 ± 3.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.61 ± 0.01 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 8c77605..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 460.12 ± 5.58 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.02 ± 0.01 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 6c331f6..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 145.43 ± 1.04 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.80 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index b9050d5..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 150.58 ± 1.93 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.13 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 04d0c86..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 413.05 ± 2.36 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.15 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index c7a5573..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 325.48 ± 1.77 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.31 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log deleted file mode 100644 index c900ebe..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 145.83 ± 2.39 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.12 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index a409195..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 130.20 ± 1.39 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.35 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 6c09327..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.16 ± 0.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.08 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 5f7c40c..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.26 ± 0.11 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 8.04 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log deleted file mode 100644 index a65273a..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.88 ± 0.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.48 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index a14f281..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.57 ± 0.11 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.49 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log index 4638587..a6af248 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 192.14 ± 0.71 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 10.75 ± 3.44 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.91 ± 0.21 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log index 2b45f78..333ac47 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 229.77 ± 0.18 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.58 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 229.15 ± 0.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.76 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log index fe52481..f26f454 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.24 ± 0.39 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.59 ± 0.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log index 62f3c9e..df5dd02 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 201.58 ± 0.09 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.57 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 197.89 ± 3.40 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.76 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..ba675a0 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.26 ± 0.94 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log index 348f5ed..a834fd9 100644 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 229.77 ± 0.32 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.59 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 820.41 ± 1.59 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.77 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log index 9f1e992..3b668bb 100644 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 223.38 ± 0.29 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 213.40 ± 3.62 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.04 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log index de6f8de..ed14086 100644 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 202.13 ± 0.24 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.58 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 224.20 ± 4.73 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log new file mode 100644 index 0000000..b6d34c8 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.70 ± 1.48 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log similarity index 69% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log index 214947a..a9a4641 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 567.65 ± 0.94 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.60 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 554.49 ± 0.62 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log index 5872035..21731f6 100644 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.86 ± 0.11 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.85 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 220.22 ± 1.60 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.04 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log similarity index 69% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log index 71ec3f9..ecb65ad 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 203.03 ± 0.17 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.58 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 193.90 ± 1.19 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.77 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log deleted file mode 100644 index c8ae56d..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 706.58 ± 0.96 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.87 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log deleted file mode 100644 index 305aaa3..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.31 ± 0.28 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.88 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log index 86106ee..f5c1c56 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 703.10 ± 0.68 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.83 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.04 ± 1.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.01 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log index 3898840..c86174c 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 818.63 ± 0.82 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.47 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 811.04 ± 1.22 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.45 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log index 2e84f94..3f70b79 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.39 ± 0.17 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.81 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.99 ± 1.44 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.00 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index a31080a..4cc26aa 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 228.56 ± 0.31 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.51 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 794.90 ± 1.42 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.45 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log index b52b25c..301a2b5 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 706.92 ± 0.89 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.87 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.36 ± 0.48 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log index 228b25b..1ddb96e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 554.98 ± 0.46 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.61 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 569.66 ± 0.60 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log index 419821e..e22f092 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.26 ± 0.30 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.86 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 750.36 ± 1.88 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log index 8c0ebea..01fed10 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 201.53 ± 0.07 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.59 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 559.73 ± 0.51 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.79 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log index 670f9fa..6d1ed7b 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 675.90 ± 1.28 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 14.26 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 680.44 ± 0.55 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.39 ± 0.03 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index f680dfa..3c0a8e7 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 371.03 ± 0.33 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.49 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 371.66 ± 0.51 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.62 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log index 36cc6ea..5a3ee90 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 504.61 ± 2.97 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 14.05 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 502.88 ± 1.45 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.21 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index df5009a..8f4867f 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 495.37 ± 0.71 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 13.87 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 496.33 ± 1.83 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.02 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log index aab4706..0219357 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 92.82 ± 0.46 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.05 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 87.20 ± 3.70 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log index 43f28f1..8dcf6c9 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 94.62 ± 0.56 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 68.87 ± 14.37 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.08 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log index b7e4cd2..627bb9e 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 91.25 ± 0.44 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 82.57 ± 10.36 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log index da3d8bb..b35b468 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 84.81 ± 0.48 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 74.78 ± 10.12 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..523552b --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 395.28 ± 0.22 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.96 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index d3b262b..f8b41b5 100644 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.75 ± 0.35 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.37 ± 1.54 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.08 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 96d541d..1ce39b8 100644 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 92.52 ± 0.44 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.05 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 79.42 ± 0.41 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.97 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index a5e826d..29c9209 100644 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 94.54 ± 0.52 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 89.19 ± 0.53 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log new file mode 100644 index 0000000..fcaf5b3 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 398.35 ± 1.07 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log similarity index 69% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log index 6b1dd72..bb05e2f 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 310.92 ± 0.73 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.05 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 302.82 ± 2.53 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log similarity index 69% rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log index aaaffba..f41ad73 100644 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 91.54 ± 0.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 59.13 ± 7.79 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log similarity index 69% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log index 846a5fd..c07bd16 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 82.85 ± 0.49 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 61.26 ± 10.54 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 6b535e0..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 405.35 ± 0.62 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log deleted file mode 100644 index 26890da..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 86.80 ± 0.36 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.02 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log index 8b094b5..d48d219 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 404.79 ± 0.61 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 418.46 ± 0.10 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index a690d20..ba0b9fa 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 472.91 ± 1.05 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.83 ± 1.65 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.07 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 6f151a6..f152fdb 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 91.08 ± 0.67 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.03 ± 0.01 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 454.10 ± 1.09 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 13775d4..a66ffac 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 93.26 ± 0.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.43 ± 1.24 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log index 1615077..adb03dd 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 368.33 ± 0.38 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 3.71 ± 0.01 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 392.50 ± 0.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.97 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log index 3803787..5ec86d3 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 311.83 ± 0.31 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 311.25 ± 0.72 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log index eba50a2..bec2363 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 80.07 ± 0.21 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.00 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 451.69 ± 0.62 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 4575c7f..3a00d5c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 324.43 ± 0.22 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log index d74242d..f20e9bc 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log @@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2819260416 failed. ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' +main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' ✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index a667917..2578dff 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2819260416 failed. ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' +main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' ✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log index 73c8358..9dfdc23 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | pp512 | 135.01 ± 0.28 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 129.49 ± 0.34 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 4.06 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index 8c6f730..96c0e93 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | pp512 | 137.76 ± 0.25 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 137.67 ± 1.25 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log index 43d8ffa..84fba20 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 727.59 ± 1.45 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.22 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 728.70 ± 1.28 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.63 ± 0.03 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log index 5f4bc59..073d72d 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 750.30 ± 1.03 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 69.96 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 752.52 ± 0.83 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.93 ± 0.02 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log index 8397b72..4ab8e49 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 728.24 ± 0.55 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 75.89 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.33 ± 1.93 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.79 ± 0.03 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log index ac89e51..bdf1afb 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 643.29 ± 0.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.53 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 645.25 ± 0.89 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.31 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..4a3544a --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2033.46 ± 5.16 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.47 ± 0.26 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log index 16dd036..78f4e0c 100644 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 752.25 ± 0.73 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.93 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2276.86 ± 9.60 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.76 ± 0.26 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log index 34e7e86..54a23d2 100644 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 729.91 ± 1.22 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.14 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 727.18 ± 2.22 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.65 ± 0.74 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log index 08d39fe..24c6a23 100644 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 645.88 ± 0.61 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.63 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 740.27 ± 10.38 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.76 ± 0.11 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log new file mode 100644 index 0000000..e669ba8 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2035.38 ± 4.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.40 ± 0.80 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log similarity index 69% rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log index 4c71363..1562460 100644 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 646.16 ± 0.39 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.53 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1515.55 ± 8.10 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.20 ± 0.39 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log index f07fba3..b49eeff 100644 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 730.51 ± 1.49 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.35 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 714.75 ± 27.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 66.10 ± 5.25 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log similarity index 68% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log index f67fdc0..f75c3de 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 1548.20 ± 4.48 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.64 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 596.86 ± 37.66 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 58.75 ± 3.09 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log deleted file mode 100644 index e24e049..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 1812.73 ± 7.38 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.55 ± 0.02 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log deleted file mode 100644 index 2791d8d..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.03 ± 0.75 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.59 ± 0.03 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index e88558b..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 651.26 ± 1.22 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 69.44 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log index 502bbc1..c7f6ba3 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 1799.45 ± 7.32 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 75.43 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2014.60 ± 24.35 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 59.16 ± 3.76 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log index c34e4a4..0862fc6 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 2267.56 ± 6.61 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 68.27 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2191.77 ± 78.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 54.32 ± 2.65 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log index d86865b..4293b33 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.58 ± 0.87 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.48 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1991.71 ± 2.91 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 56.37 ± 3.40 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log index 9ddd54a..1af2b23 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 750.44 ± 0.80 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 68.27 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2096.22 ± 4.59 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 64.88 ± 0.05 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log index b4f3d2a..7e86d5a 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 1812.27 ± 4.63 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.22 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2027.41 ± 4.62 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 77.12 ± 0.03 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log index a78e906..a3497bf 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 1510.06 ± 4.96 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.58 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1550.55 ± 4.52 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.54 ± 0.06 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log index e4cd337..51f19f4 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.81 ± 1.15 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.03 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1992.48 ± 7.34 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 77.05 ± 0.03 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log index a600775..a65b575 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 645.48 ± 1.40 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 69.67 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1474.15 ± 1.44 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.44 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log index 8dafbf8..2624621 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1628.18 ± 1.73 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 84.23 ± 0.15 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1593.62 ± 2.90 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.26 ± 0.26 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index fc50285..20e82ce 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 947.36 ± 1.47 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 60.35 ± 0.15 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 936.52 ± 2.35 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 60.89 ± 0.10 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log index 2ecedbd..6ba35a8 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1529.98 ± 0.80 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 86.95 ± 0.31 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1515.05 ± 2.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 87.54 ± 0.18 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index 309d21b..a1a86a0 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 1498.81 ± 1.70 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 81.29 ± 0.12 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1476.16 ± 5.12 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 82.48 ± 0.36 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log index aa28166..d8257a4 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 353.66 ± 0.64 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.65 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.59 ± 0.86 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.97 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log index 11185b5..c765b22 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2ad71050) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 390.43 ± 0.70 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.81 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log index e00d36f..306797a 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 352.40 ± 1.12 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 31.99 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.94 ± 1.35 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.97 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log index 5f9c27c..a49785b 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 321.54 ± 0.46 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.03 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 322.57 ± 0.31 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.30 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..2b2057d --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 622.16 ± 6.71 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.91 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log index dbb739d..a8e0637 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 411.33 ± 1.01 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.50 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 743.09 ± 4.89 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.76 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log index 2301d16..d94bfc6 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 355.01 ± 0.57 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.66 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.98 ± 0.72 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.86 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 69% rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log index 707b558..95ce008 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 249.65 ± 0.33 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 390.67 ± 0.97 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.79 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log new file mode 100644 index 0000000..320bdde --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 617.00 ± 4.97 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.90 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log index a62923c..aaf9547 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 247.95 ± 0.40 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 543.39 ± 5.51 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.28 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log index fc1ded3..717bdb1 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.36 ± 0.53 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 31.90 ± 0.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.18 ± 0.29 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.88 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log similarity index 69% rename from benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log index 5a91196..a328319 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 548.27 ± 2.65 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.07 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 322.46 ± 0.46 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.33 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log deleted file mode 100644 index 218c087..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 604.24 ± 4.34 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.69 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log deleted file mode 100644 index 957c7c2..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.23 ± 1.71 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.66 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index b665daa..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 323.79 ± 0.87 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.04 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log index f5db248..bfcbd06 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 592.27 ± 5.61 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.68 ± 0.02 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 643.61 ± 7.14 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.91 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log index 60e0975..9e50477 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 735.02 ± 5.32 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.34 ± 0.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 736.33 ± 3.33 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.74 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log index 7e97f90..a3eb53e 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 353.49 ± 1.71 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.63 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 651.63 ± 3.08 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.88 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log index a07f108..422fcf2 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 388.50 ± 1.06 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.28 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 738.84 ± 9.12 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.79 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log index c05f1a3..8fc2f66 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 598.68 ± 9.32 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.75 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 649.28 ± 0.87 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.99 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log index f9a46a3..35770c3 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 546.30 ± 3.37 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 550.01 ± 3.85 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.38 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log index 19aa96b..7ed9087 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.34 ± 0.67 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.76 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 659.79 ± 3.13 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.01 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log index 2733c29..af54d9f 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 324.26 ± 0.80 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.05 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 553.65 ± 2.40 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.31 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log index 461cbc2..9f2e80d 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 450.26 ± 1.46 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.56 ± 0.03 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 449.86 ± 1.68 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 34.19 ± 0.02 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log index 2219116..9fa4616 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 499.80 ± 1.95 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.18 ± 0.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 496.21 ± 1.71 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.64 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log index c45dee8..71ff53d 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 230.22 ± 0.76 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.16 ± 0.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 230.09 ± 0.83 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.57 ± 0.02 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log index 718febd..547d915 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 243.20 ± 1.11 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.15 ± 0.02 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 243.96 ± 0.96 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.79 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log index c520d12..360ed4e 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.37 ± 0.72 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.11 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 353.20 ± 0.30 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.42 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log index 9dc5fe3..6969d6b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3c5a6050) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 387.10 ± 0.42 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.16 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log index 03da684..32eae28 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x8bc5050) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x2bea6540) reason :GPU Hang ✖ ! [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log index 97f0889..1d395f6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 319.23 ± 0.62 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.79 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 319.84 ± 0.73 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.43 ± 0.02 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..f3ebd8d --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 606.86 ± 5.18 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.02 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 769bedc..78fca14 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 246.76 ± 0.35 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.67 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 732.72 ± 4.06 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.14 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index 3f432b9..0436056 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.53 ± 0.62 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.05 ± 0.08 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 351.42 ± 1.56 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.39 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 57% rename from benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log index 1848af6..4fe67c3 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1fec7050) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x3273c340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log new file mode 100644 index 0000000..f5b3307 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 608.20 ± 7.04 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.40 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log index fa4767b..7a9d128 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 408.50 ± 1.91 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.69 ± 0.18 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 533.95 ± 3.58 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.41 ± 0.03 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log index c1f2f78..3f1f7ba 100644 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.45 ± 1.22 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 44.12 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.53 ± 0.81 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.41 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log similarity index 68% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log index bbf6d17..fa1e33d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 539.93 ± 1.23 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.01 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 320.78 ± 0.96 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.49 ± 0.03 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log deleted file mode 100644 index 88c729a..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 589.45 ± 4.75 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.00 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log deleted file mode 100644 index 1b8a39f..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x261760b0) reason :GPU Hang -✖ ! [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 2a30ca3..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 323.04 ± 0.94 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.01 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log index 77cb354..17a14c7 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 586.82 ± 5.23 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 44.72 ± 0.30 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 635.84 ± 5.72 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log index ae5c27f..8695248 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 684.17 ± 67.05 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.14 ± 0.27 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 708.36 ± 12.96 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index c0da5bb..117d484 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 350.89 ± 1.88 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 44.93 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.68 ± 9.08 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 4973b6a..173cfbe 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 734.35 ± 10.26 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log index 933469d..a5dcda7 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 589.82 ± 5.12 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.12 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 646.07 ± 6.86 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.50 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log index 456548d..c8991e9 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 540.27 ± 2.82 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.89 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 3.26 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.31 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log index b3222aa..f2a91fb 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.60 ± 1.20 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.04 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 657.58 ± 3.78 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.56 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log index b82d07b..698f338 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 319.46 ± 0.48 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 43.90 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 550.79 ± 2.99 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.41 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log index 7d5b354..9d4e9e1 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 488.47 ± 2.30 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 48.21 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 485.54 ± 2.45 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 49.29 ± 0.03 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index 3441dc1..c4cd434 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 547.53 ± 3.03 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 47.49 ± 0.08 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 540.81 ± 2.56 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 48.25 ± 0.03 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log index f6cbe94..acd5b3a 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 239.44 ± 1.23 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 49.15 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 239.24 ± 1.27 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 50.39 ± 0.05 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 5538ed2..f04d91c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 255.37 ± 1.68 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 49.31 ± 0.08 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 255.50 ± 1.49 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.41 ± 0.04 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log index f7c6172..6698000 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.31 ± 4.50 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.87 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.30 ± 4.23 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.10 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log index 020f7b9..5f3dda7 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 343.30 ± 5.27 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.76 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 342.14 ± 4.83 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.05 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log index 88a7e15..5e1169a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 322.55 ± 4.18 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 24.90 ± 0.02 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.36 ± 4.35 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.12 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log index 41e5d8b..a1a5a05 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 304.86 ± 3.77 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.58 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 304.23 ± 3.73 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.85 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..45d2dea --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1198.51 ± 10.39 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.14 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log index 266806c..219d081 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 257.11 ± 2.63 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.47 ± 0.08 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.44 ± 7.03 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log index e1b0205..2c0a1a3 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 324.54 ± 4.39 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.87 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 326.80 ± 4.56 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.13 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log index 8e851e8..b04117d 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 380.87 ± 8.21 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.79 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 350.18 ± 5.10 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.09 ± 0.00 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log new file mode 100644 index 0000000..8d5aeeb --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +hipBLASLt error: Heuristic Fetch Failed! +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1185.57 ± 6.55 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.12 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log similarity index 69% rename from benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log index 3123235..4138942 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1011.32 ± 4.33 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.65 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1000.77 ± 2.37 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log index d9bd7eb..76ec711 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 323.86 ± 4.33 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.27 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 322.00 ± 4.37 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.14 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log similarity index 69% rename from benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log index 41d7fd7..9ce8ee4 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 301.30 ± 4.81 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.65 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 303.26 ± 4.84 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log deleted file mode 100644 index c97bff7..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1135.90 ± 9.10 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.88 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log deleted file mode 100644 index c01b9a9..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 313.05 ± 6.96 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.86 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log index 1912f8c..d80b5e6 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1130.14 ± 7.45 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.84 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1256.75 ± 10.54 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.11 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log index 5046a32..2648480 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1502.62 ± 12.84 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.67 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1481.17 ± 9.67 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.03 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log index c83d4a2..9c6df9d 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 319.92 ± 6.39 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.83 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1202.19 ± 5.53 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.10 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log index fe7a810..8b68b31 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 338.36 ± 5.02 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.71 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1422.90 ± 11.48 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log index 1c550a3..8a71c43 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1130.86 ± 14.88 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.89 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1253.01 ± 23.20 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.11 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log index a59b260..eaf7b98 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1007.82 ± 22.14 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.66 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1005.24 ± 32.45 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.89 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log index 84a8d46..685527d 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 321.80 ± 6.18 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.83 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.02 ± 12.30 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log index ec0c58d..18915d7 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 302.84 ± 5.01 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.61 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 985.58 ± 10.64 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.88 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log index 6875f68..381cc89 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 369.60 ± 1.30 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 8.72 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 367.61 ± 1.90 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 8.69 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log index b4f9322..4212c20 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 389.96 ± 1.87 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 8.70 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 386.12 ± 1.98 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 8.66 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log index ad6bfa4..a3f7dda 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 318.04 ± 1.50 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 7.89 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 315.56 ± 1.40 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 7.86 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log index 072c052..257f941 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 334.64 ± 1.46 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 7.90 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 333.31 ± 1.47 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.92 ± 0.01 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log index 59744c9..28cbd07 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 581.92 ± 2.00 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.34 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 582.60 ± 4.90 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.91 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log index 97f911b..985eaf7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 642.40 ± 3.59 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.74 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 644.05 ± 3.87 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.63 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log index 2bd619e..c8bf125 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 582.94 ± 2.35 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.35 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 581.11 ± 2.96 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.00 ± 0.02 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log index 8a01f71..320f480 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 522.14 ± 1.92 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.97 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 522.29 ± 2.36 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.63 ± 0.00 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..2c92204 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1184.03 ± 8.37 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.07 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log index cb2c45b..cd2e38f 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 649.48 ± 3.21 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 64.18 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1480.28 ± 9.38 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.45 ± 0.02 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log index c377132..f7e992e 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 580.83 ± 2.46 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.47 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.04 ± 2.52 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.87 ± 0.02 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log similarity index 78% rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log index 34d817d..2016c11 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 394.67 ± 1.08 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.97 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 643.25 ± 3.86 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.67 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log new file mode 100644 index 0000000..d5473e4 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log @@ -0,0 +1,15 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +rocBLAS error: No hipBLASLt solution found +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. + +rocBLAS warning: hipBlasLT failed, falling back to tensile. +This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1171.02 ± 7.04 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.94 ± 0.04 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log similarity index 69% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log index 9c6c567..abd5fd2 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1005.66 ± 1.52 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.07 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 996.31 ± 6.53 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.68 ± 0.01 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log similarity index 79% rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log index 343b2b0..be6782a 100644 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 582.89 ± 2.32 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.45 ± 0.02 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 582.51 ± 2.41 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.89 ± 0.01 | -build: cd6983d5 (6119) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log similarity index 68% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log index 661d58f..90bc0d7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log @@ -1,10 +1,10 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 528.92 ± 2.02 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.00 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 522.63 ± 1.74 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.66 ± 0.03 | -build: 79c1160b (6123) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log deleted file mode 100644 index 7ccf6e0..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1128.54 ± 2.40 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.39 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log deleted file mode 100644 index 57a687a..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 585.03 ± 1.84 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.36 ± 0.01 | - -build: 79c1160b (6123) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log index f49d7ff..8e4bdde 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1124.54 ± 9.14 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.19 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1236.64 ± 11.20 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.78 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log index 82390bf..c87a02c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1474.70 ± 11.50 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.31 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1460.58 ± 11.92 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.26 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log index b1e54db..2dd9efb 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 583.69 ± 2.09 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.26 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1299.34 ± 7.77 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.85 ± 0.00 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log index 3068d5b..fc2acae 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 642.92 ± 1.97 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.28 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1516.33 ± 21.51 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.40 ± 0.01 | -build: 34c9d765 (6122) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log index d848311..34dc4e8 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1125.60 ± 1.90 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.35 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1246.14 ± 8.32 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.15 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log index bd4d588..f6feec3 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 997.74 ± 8.16 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.00 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1010.38 ± 6.35 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.49 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log index 8b8a81a..425b7fd 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.02 ± 1.44 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.50 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1303.74 ± 6.94 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.10 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log index d0567df..0bd1151 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 525.48 ± 1.39 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.04 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1037.92 ± 11.67 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.63 ± 0.01 | -build: 79c1160b (6123) +build: de219279 (6181) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log index ec3e361..10fffaa 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 1218.18 ± 8.08 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 69.76 ± 0.07 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1220.69 ± 8.95 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 71.42 ± 0.20 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index fbda2c7..185b49c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 1482.59 ± 12.76 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 68.63 ± 0.11 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1467.61 ± 12.70 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 69.47 ± 0.09 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log index a2342cc..cf40790 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 649.86 ± 5.16 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 70.72 ± 0.04 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 651.21 ± 5.24 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 72.35 ± 0.08 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index d1051a1..ed9a28f 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 728.71 ± 8.40 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 70.49 ± 0.04 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 732.35 ± 7.51 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.05 ± 0.07 | -build: 34c9d765 (6122) +build: 1fe00296 (6182) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log deleted file mode 100644 index 15e8d8f..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.76 ± 0.04 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.48 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log deleted file mode 100644 index ab19f84..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 99.09 ± 0.10 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log deleted file mode 100644 index 9bd37ad..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 81.54 ± 0.11 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log deleted file mode 100644 index b92eb71..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2595b0b0) reason :GPU Hang -✖ ! [rocm7_beta] llama3.3-70.6B-Q4_K_M __hblt0 failed (exit 134) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log deleted file mode 100644 index 3fdb6c0..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 31.63 ± 0.02 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 4.62 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log deleted file mode 100644 index 80974fd..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 99.41 ± 0.11 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 928b750..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 106.70 ± 0.12 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.60 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index d78ff89..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.87 ± 0.08 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.61 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 1757ab6..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 34.48 ± 0.05 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 4.61 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log deleted file mode 100644 index 5075f85..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 99.16 ± 0.09 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log deleted file mode 100644 index afc44e9..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 81.56 ± 0.09 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.62 ± 0.00 | - -build: 79c1160b (6123) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log deleted file mode 100644 index 9718332..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] llama3.3-70.6B-Q4_K_M __hblt0 failed (exit 134) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 9d5bb3c..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] llama3.3-70.6B-Q4_K_M __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log deleted file mode 100644 index f70a707..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 72.73 ± 0.05 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.08 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index d13dc99..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 73.47 ± 0.03 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.04 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log deleted file mode 100644 index 5ccc1cf..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 78.79 ± 0.21 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.04 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 375bd60..0000000 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 80.58 ± 0.13 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.03 ± 0.00 | - -build: 34c9d765 (6122) diff --git a/benchmark/results/run_benchmarks.log b/benchmark/results/run_benchmarks.log deleted file mode 100644 index b9e965c..0000000 --- a/benchmark/results/run_benchmarks.log +++ /dev/null @@ -1,1392 +0,0 @@ -Found 19 model(s) to bench: - • /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - • /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - • /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - • /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - • /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - • /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - • /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - • /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - • /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - • /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - • /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - • /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - • /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - • /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - • /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - • /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - • /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - • /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - • /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 : FAILED - -▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_rc-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm7_rc] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm7_rc] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm7_beta] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm7_beta] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [vulkan_radv] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [vulkan_radv] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm7_rc-rocwmma] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 : FAILED - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm7_rc] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm7_beta] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - * [rocm6_4_2-rocwmma] gpt-oss-120b-F16 : FAILED - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [vulkan_radv] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [vulkan_amdvlk] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm6_4_2] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm7_rc-rocwmma] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - * [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 : FAILED - -▶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm7_rc-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm7_rc] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm7_beta] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [vulkan_radv] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [vulkan_amdvlk] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm6_4_2] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm7_rc-rocwmma] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm7_rc] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm7_beta] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [vulkan_radv] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm6_4_2] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm7_rc-rocwmma] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_rc-rocwmma] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED - -▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [rocm7_rc] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [rocm7_beta] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [rocm7_beta] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [vulkan_radv] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [vulkan_radv] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [rocm6_4_2] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf - - -▶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1 - → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1 - - -▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log deleted file mode 100644 index 268535b..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x834aa90) on address 0x7f10fb96f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 52deb8e..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x100d3790) reason :GPU Hang -✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 8039123..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x13829790) on address 0x7fa8ef9a9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index fcf0f01..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 130.17 ± 0.38 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.83 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 4ef718e..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 200.76 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.78 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 4bbf6de..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 201.86 ± 0.27 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 22.83 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log deleted file mode 100644 index 90347e7..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.73 ± 0.23 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.88 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index cf98168..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 132.54 ± 0.34 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 23.31 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index a0c808c..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2f508a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log deleted file mode 100644 index 273166e..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 120.54 ± 0.30 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log deleted file mode 100644 index c23fe13..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2a849790) reason :GPU Hang -✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log deleted file mode 100644 index 5fbf5b3..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.18 ± 0.48 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 28ae734..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log deleted file mode 100644 index 4247170..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 223.02 ± 0.69 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.47 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index e3bc753..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 224.54 ± 0.65 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.49 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log deleted file mode 100644 index 5f0ace5..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.36 ± 0.46 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.78 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 1973a52..0000000 --- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 131.78 ± 0.46 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.99 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log deleted file mode 100644 index 135d108..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x121f0a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index 29b2095..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x17018a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log deleted file mode 100644 index 08dae7b..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x11442a90) reason :GPU Hang -✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index e01b520..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xa636790) reason :GPU Hang -✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 2f2342b..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1417b7b0) reason :GPU Hang -✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index c479337..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 33.30 ± 0.04 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.64 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 7b0ea20..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | pp512 | 31.09 ± 0.02 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | tg128 | 2.65 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 4581b23..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2491416576 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 8835330..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2491416576 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log deleted file mode 100644 index c6c72c5..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | pp512 | 78.70 ± 0.20 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | tg128 | 2.66 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index ea12120..0000000 --- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | pp512 | 81.29 ± 0.14 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | tg128 | 2.66 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index 3de552f..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1496da90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 2acc073..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xfeef7b0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 7a57ad3..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x6d017c0) on address 0x7f967f1a9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index b9ba150..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index c55bab8..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index eb3efec..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 98.14 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 966e109..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 99.24 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.72 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log deleted file mode 100644 index 80c3a0e..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 80.11 ± 0.09 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 5826f3e..0000000 --- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 82.90 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log deleted file mode 100644 index 40f418b..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x28bb9a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index a94cdd6..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x194fea90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log deleted file mode 100644 index 6d3b4ea..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x17ad57b0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 107b01e..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2314b7b0) on address 0x7f38249a9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log deleted file mode 100644 index ccf7ac1..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.25 ± 0.50 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 8df0b3e..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index dc80b9d..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 243.45 ± 1.29 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.29 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 08242f2..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 247.48 ± 1.28 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.03 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log deleted file mode 100644 index ba7a655..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 148.25 ± 0.91 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.21 ± 0.06 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 14f12dd..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 149.82 ± 0.83 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.21 ± 0.04 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log deleted file mode 100644 index 2faeaa3..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x9ae6a90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index 6ff4745..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x6e9ba90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log deleted file mode 100644 index c768b8e..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x4081f7b0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log deleted file mode 100644 index 98c472e..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3c0f27b0) reason :GPU Hang -✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log deleted file mode 100644 index 9c06e2b..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 3ccfa82..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log deleted file mode 100644 index 3bdeae7..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 258.18 ± 1.38 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.23 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 2060565..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 260.16 ± 1.44 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.09 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log deleted file mode 100644 index d9b6ebc..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 168.63 ± 0.81 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.26 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 579e532..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 172.37 ± 0.92 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.25 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log deleted file mode 100644 index 070646e..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1a40fa90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index 3fa46c3..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2e0ffa90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log deleted file mode 100644 index 9d0c061..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3a741a90) reason :GPU Hang -✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log deleted file mode 100644 index fb93137..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.90 ± 0.66 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.62 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 2e1a6fc..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 75ac351..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 218.18 ± 0.83 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.04 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index a745a31..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 221.15 ± 0.74 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.58 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log deleted file mode 100644 index 4b78701..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 152.21 ± 0.66 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 19.98 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index ee535dc..0000000 --- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 155.22 ± 1.09 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.93 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log deleted file mode 100644 index aa6dfe3..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x153dfa90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index e2df164..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2bd2ba90) reason :GPU Hang -✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log deleted file mode 100644 index b5a6749..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x513c7b0) reason :GPU Hang -✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log deleted file mode 100644 index 7826050..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2567c7c0) on address 0x7ee66236f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log deleted file mode 100644 index dbd9c47..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 57b950a..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log deleted file mode 100644 index af5c138..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 114.49 ± 0.60 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 15.98 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 19e5e37..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 116.07 ± 0.64 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 15.84 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log deleted file mode 100644 index 2aefda4..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 64.85 ± 0.38 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.58 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index c0359f0..0000000 --- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 66.76 ± 0.43 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 16.83 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log deleted file mode 100644 index ea26bd0..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 153.49 ± 1.19 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.52 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index bb2103f..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 138.49 ± 2.52 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.35 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log deleted file mode 100644 index e446a9b..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 152.26 ± 2.41 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.55 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index d73c640..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 137.52 ± 1.75 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 1687c7e..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.48 ± 0.16 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.04 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index a9a752b..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.64 ± 0.13 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.96 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log deleted file mode 100644 index ccca043..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.97 ± 0.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.38 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 48148ef..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 87.05 ± 0.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.40 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log deleted file mode 100644 index a3987ef..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 390.07 ± 0.40 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.19 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log deleted file mode 100644 index a9ca9ef..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 300.60 ± 2.31 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.78 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log deleted file mode 100644 index 8ff09a7..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.99 ± 1.86 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.31 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log deleted file mode 100644 index db6f9b0..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 302.87 ± 0.88 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.90 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log deleted file mode 100644 index 51b45f0..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 736.95 ± 3.72 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 56.89 ± 0.26 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3f2a08e..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 727.71 ± 2.81 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 53.34 ± 0.31 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log deleted file mode 100644 index 5140ff3..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 395.16 ± 1.55 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 58.95 ± 0.45 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 6bbc4f2..0000000 --- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 405.61 ± 1.85 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 58.06 ± 0.28 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log deleted file mode 100644 index 6625574..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.50 ± 1.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.55 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log deleted file mode 100644 index 222959d..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.34 ± 1.74 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.14 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log deleted file mode 100644 index cc48f94..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 146.55 ± 1.77 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.54 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 285bed2..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2bd8a7b0) on address 0x7fe0b0d6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 29fa537..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 145.91 ± 1.76 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.57 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 1416318..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index 65ecb3e..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 106.99 ± 0.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.03 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 2b69233..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.10 ± 0.08 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.98 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log deleted file mode 100644 index 3a2d167..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.50 ± 0.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.42 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 9132fa2..0000000 --- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.52 ± 0.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.40 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log deleted file mode 100644 index 6493650..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.67 ± 0.37 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.88 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log deleted file mode 100644 index a535d64..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 203.12 ± 0.35 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.60 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log deleted file mode 100644 index f1ec100..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.49 ± 0.29 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log deleted file mode 100644 index f4493e0..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 201.47 ± 0.21 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.61 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log deleted file mode 100644 index 5ac352f..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 676.94 ± 0.85 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 13.99 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index b3193bd..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 371.17 ± 0.24 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.30 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log deleted file mode 100644 index b620676..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 503.27 ± 1.09 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 13.76 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 5e9431a..0000000 --- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 495.99 ± 2.36 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 13.61 ± 0.03 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log deleted file mode 100644 index c646996..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x10c4a90) reason :GPU Hang -✖ ! [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log deleted file mode 100644 index 18449f7..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.61 ± 0.31 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 9e9f25c..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 55.68 ± 0.47 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 3.11 ± 0.98 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index f7ce012..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.08 ± 0.42 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log deleted file mode 100644 index d74242d..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index a667917..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log deleted file mode 100644 index 0c3a407..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | pp512 | 135.58 ± 0.45 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | tg128 | 4.00 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index f2077af..0000000 --- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | pp512 | 138.61 ± 0.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log deleted file mode 100644 index ea76e52..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 732.13 ± 1.42 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.23 ± 0.03 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log deleted file mode 100644 index 76e9619..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 652.29 ± 0.45 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.62 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log deleted file mode 100644 index ce94640..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 730.59 ± 1.69 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.01 ± 0.03 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log deleted file mode 100644 index b707702..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1614.72 ± 4.91 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 84.00 ± 0.23 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log deleted file mode 100644 index 6055d96..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 942.34 ± 1.76 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 57.70 ± 0.22 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log deleted file mode 100644 index 5a56858..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1527.75 ± 3.86 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 85.54 ± 0.99 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log deleted file mode 100644 index ab5608b..0000000 --- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 1489.57 ± 4.71 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 80.63 ± 0.22 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log deleted file mode 100644 index 4e2c281..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 357.38 ± 0.76 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.62 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log deleted file mode 100644 index 63dd9d9..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 356.67 ± 0.74 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.68 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log deleted file mode 100644 index 8096c36..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 247.49 ± 0.65 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.07 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log deleted file mode 100644 index 755a9cf..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 448.17 ± 1.37 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.39 ± 0.03 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log deleted file mode 100644 index 152170f..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 498.69 ± 2.19 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.06 ± 0.03 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log deleted file mode 100644 index 5ab95e4..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 229.59 ± 0.74 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.08 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log deleted file mode 100644 index 9d830ae..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 243.40 ± 0.99 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.07 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log deleted file mode 100644 index 3892e39..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 354.82 ± 1.02 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.00 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log deleted file mode 100644 index 69476e2..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 248.22 ± 0.50 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.05 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log deleted file mode 100644 index 3a57ced..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.20 ± 0.59 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.15 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 93e7fca..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,5 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log deleted file mode 100644 index d229658..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 486.90 ± 2.23 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 48.08 ± 0.03 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index b556c96..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 546.41 ± 2.88 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 47.25 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log deleted file mode 100644 index 802c652..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 239.72 ± 1.23 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 49.01 ± 0.06 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 6b8a8c4..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 255.17 ± 1.65 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 48.93 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log deleted file mode 100644 index d76138e..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 322.43 ± 2.59 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.89 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log deleted file mode 100644 index 6dd4954..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 254.08 ± 3.99 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.62 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log deleted file mode 100644 index 67b820b..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 319.36 ± 3.07 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.88 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log deleted file mode 100644 index e07a069..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 254.87 ± 2.27 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.62 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log deleted file mode 100644 index 52536d1..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 369.69 ± 1.79 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 8.59 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log deleted file mode 100644 index 974e845..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 389.86 ± 2.13 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 8.58 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log deleted file mode 100644 index 7decf08..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 319.09 ± 1.46 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 7.79 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log deleted file mode 100644 index a9ce691..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 335.15 ± 1.80 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 7.79 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log deleted file mode 100644 index 441cec1..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 583.52 ± 2.76 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.39 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log deleted file mode 100644 index e5f1e99..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 396.75 ± 0.60 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.98 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log deleted file mode 100644 index 97fab79..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 581.83 ± 1.10 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.50 ± 0.02 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log deleted file mode 100644 index 3e34f41..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 394.87 ± 0.73 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.06 ± 0.01 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log deleted file mode 100644 index 2d4b788..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 1205.02 ± 7.18 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 68.84 ± 0.04 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log deleted file mode 100644 index 9a5c4c5..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 1472.56 ± 14.39 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 67.78 ± 0.18 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log deleted file mode 100644 index f400d0f..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 648.85 ± 6.28 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 69.88 ± 0.04 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log deleted file mode 100644 index 1959c7e..0000000 --- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 728.38 ± 8.17 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 69.80 ± 0.05 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log deleted file mode 100644 index e9da9da..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.47 ± 0.04 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log deleted file mode 100644 index 0388774..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 34.51 ± 0.02 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.61 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log deleted file mode 100644 index 01f32df..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.79 ± 0.03 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.52 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log deleted file mode 100644 index f9ae86b..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.67 ± 0.04 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log deleted file mode 100644 index f6959d1..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.88 ± 0.02 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log deleted file mode 100644 index 2869c45..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.67 ± 0.02 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log deleted file mode 100644 index 6bd1b01..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.91 ± 0.03 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log deleted file mode 100644 index 77dd920..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.66 ± 0.04 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log deleted file mode 100644 index bc604f8..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 72.75 ± 0.02 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.03 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index 7ac44cb..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 73.57 ± 0.02 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.00 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log deleted file mode 100644 index 4cc5212..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 78.99 ± 0.18 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.00 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 869327e..0000000 --- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 80.92 ± 0.05 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 4.99 ± 0.00 | - -build: cd6983d5 (6119) diff --git a/benchmark/results_08-08-2025/run_benchmarks.log b/benchmark/results_08-08-2025/run_benchmarks.log deleted file mode 100644 index 073dde1..0000000 --- a/benchmark/results_08-08-2025/run_benchmarks.log +++ /dev/null @@ -1,1153 +0,0 @@ -Found 18 model(s) to bench: - • /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - • /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - • /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - • /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - • /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - • /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - • /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - • /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - • /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - • /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - • /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - • /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - • /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - • /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - • /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - • /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - • /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - • /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 : FAILED - -▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - * [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 : FAILED - -▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 __fa1 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL __fa1 - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 - - -▶ [rocm7_rc] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm7_rc] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm7_beta] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm7_beta] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [vulkan_radv] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [vulkan_radv] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf - - -▶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S __fa1 - → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf - - -▶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 - → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_rc] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm7_rc] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm7_beta] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [vulkan_radv] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [vulkan_amdvlk] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-120b-F16 - → log: results/gpt-oss-120b-F16__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf - - -▶ [rocm6_4_2] gpt-oss-120b-F16 __fa1 - → log: results/gpt-oss-120b-F16__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - * [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf - - -▶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 - → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm7_rc] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm7_beta] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [vulkan_radv] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [vulkan_amdvlk] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-20b-F32 - → log: results/gpt-oss-20b-F32__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf - - -▶ [rocm6_4_2] gpt-oss-20b-F32 __fa1 - → log: results/gpt-oss-20b-F32__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 - - -▶ [rocm7_rc] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm7_rc] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm7_beta] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm7_beta] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [vulkan_radv] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [vulkan_radv] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm6_4_2] gpt-oss-20b-mxfp4 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf - - -▶ [rocm6_4_2] gpt-oss-20b-mxfp4 __fa1 - → log: results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 - - -▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M __fa1 - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 - - * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - * [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED - -▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED - -▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 - - * [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED - -▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - * [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - * [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED - -▶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - - -▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 - diff --git a/benchmark/run_benchmarks.sh b/benchmark/run_benchmarks.sh index b4d15e7..151e1d1 100755 --- a/benchmark/run_benchmarks.sh +++ b/benchmark/run_benchmarks.sh @@ -28,7 +28,8 @@ echo declare -A CMDS=( [rocm6_4_2]="toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench" [rocm6_4_2-rocwmma]="toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench" - [rocm7_beta]="toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench" + [rocm6_4_3]="toolbox run -c llama-rocm-6.4.3 -- /usr/local/bin/llama-bench" + [rocm6_4_3-rocwmma]="toolbox run -c llama-rocm-6.4.3-rocwmma -- /usr/local/bin/llama-bench" [rocm7_rc]="toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench" [rocm7_rc-rocwmma]="toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench" [vulkan_amdvlk]="toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench" @@ -41,8 +42,8 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do for ENV in "${!CMDS[@]}"; do CMD="${CMDS[$ENV]}" - # For ROCm 7 envs, run default + HIPBLASLT=0 variants; others: default only - if [[ "$ENV" == rocm7_* ]]; then + # For ROCm 6.4.3 and 7 envs, run default + HIPBLASLT=0 variants; others: default only + if [[ "$ENV" == rocm7_* || "$ENV" == rocm6_4_3* ]]; then HBLT_MODES=( default off ) else HBLT_MODES=( default ) diff --git a/docs/index.html b/docs/index.html index b81fa7e..d8963c8 100644 --- a/docs/index.html +++ b/docs/index.html @@ -363,6 +363,7 @@ Repo: kyuz0/amd-strix-halo-toolboxes

+

Platform: Framework Desktop, 128GB Unified RAM (accelerator-performance tuned profile)

Loading meta…

@@ -491,7 +492,7 @@
`; diff --git a/docs/results.json b/docs/results.json index 22c4563..d05c75a 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,22 +1,24 @@ { "meta": { - "generated_at": "2025-08-10T11:20:41Z", - "os_kernel": "Fedora 42 \u2014 Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)", + "generated_at": "2025-08-17T07:42:51Z", + "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" }, { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } ], "environments": [ "rocm6_4_2", "rocm6_4_2-rocwmma", - "rocm7_beta", - "rocm7_beta-hblt0", + "rocm6_4_3", + "rocm6_4_3-hblt0", + "rocm6_4_3-rocwmma", + "rocm6_4_3-rocwmma-hblt0", "rocm7_rc", "rocm7_rc-hblt0", "rocm7_rc-rocwmma", @@ -38,7 +40,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -56,106 +58,6 @@ "env_base": "rocm6_4_2", "env_variant": "rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 139.31, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 19.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 130.07, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 19.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, "test": null, "tps_mean": null, "tps_std": null, @@ -168,118 +70,168 @@ "file_size_gib": null, "name_params_b": null, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 124.5, - "tps_std": 0.25, + "tps_mean": 131.14, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 20.02, + "tps_mean": 20.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 104.12, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 20.35, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 100.8, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 130.22, + "tps_mean": 126.62, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 19.95, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 135.1, "tps_std": 0.35, "error": false, "error_type": null, @@ -290,21 +242,171 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 20.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 130.99, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 20.0, + "tps_mean": 20.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 140.15, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 20.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 126.66, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 20.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -315,19 +417,69 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, "fa": true, + "test": "pp512", + "tps_mean": 100.2, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 20.3, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, "test": null, "tps_mean": null, "tps_std": null, @@ -340,7 +492,29 @@ "file_size_gib": null, "name_params_b": null, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", "build": null }, { @@ -351,12 +525,12 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 120.16, - "tps_std": 0.21, + "tps_mean": 117.48, + "tps_std": 0.53, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, @@ -364,8 +538,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -376,12 +550,12 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 19.96, - "tps_std": 0.01, + "tps_mean": 20.11, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, @@ -389,8 +563,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -401,12 +575,12 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 133.91, - "tps_std": 0.57, + "tps_mean": 126.27, + "tps_std": 0.47, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, @@ -414,8 +588,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -426,12 +600,12 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 19.94, + "tps_mean": 19.86, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, @@ -439,8 +613,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -451,507 +625,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 129.49, - "tps_std": 0.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 19.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 138.34, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 19.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 124.65, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 19.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 100.9, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 129.49, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 19.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 103.73, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 20.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 201.03, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 22.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 201.89, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 22.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 128.01, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 22.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 132.56, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 23.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 124.75, + "tps_mean": 158.54, "tps_std": 0.42, "error": false, "error_type": null, @@ -959,13 +633,432 @@ "ngl": 99, "mmap": 0, "params_b": 110.47, - "file_size_gib": 94.57, + "file_size_gib": 68.01, "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log", + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 166.11, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 19.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 89.6, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 20.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 64.66, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 20.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 197.95, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 23.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 199.4, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 23.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 126.28, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 23.33, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 131.64, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 23.88, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -975,23 +1068,20 @@ "env_base": "rocm6_4_2", "env_variant": "rocwmma", "fa": false, - "test": "tg128", - "tps_mean": 15.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1004,7 +1094,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -1022,23 +1112,20 @@ "env_base": "rocm6_4_2", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 124.94, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1046,345 +1133,380 @@ "env": "rocm6_4_2", "env_base": "rocm6_4_2", "env_variant": null, + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 121.82, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 15.35, + "tps_mean": 15.59, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 100.41, - "tps_std": 0.16, + "tps_mean": 126.6, + "tps_std": 0.3, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 15.53, + "tps_mean": 15.62, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 117.95, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.65, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 118.61, - "tps_std": 0.54, + "tps_mean": 69.19, + "tps_std": 0.2, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": false, "test": "tg128", + "tps_mean": 15.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 114.61, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", "tps_mean": 15.51, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 90.24, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.55, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 123.75, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 118.92, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 15.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 127.14, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 15.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -1394,20 +1516,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 120.88, + "tps_std": 0.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.61, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1416,60 +1566,63 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, "test": "pp512", - "tps_mean": 118.52, - "tps_std": 0.35, + "tps_mean": 150.07, + "tps_std": 0.56, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma-hblt0", + "fa": true, "test": "tg128", "tps_mean": 15.52, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 69.52, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1477,8 +1630,33 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -1489,12 +1667,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 97.36, - "tps_std": 0.07, + "tps_mean": 74.02, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1502,8 +1680,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -1514,12 +1692,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.57, - "tps_std": 0.02, + "tps_mean": 15.73, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1527,8 +1705,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -1538,20 +1716,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 142.67, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1583,12 +1789,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 223.59, - "tps_std": 0.5, + "tps_mean": 219.81, + "tps_std": 0.7, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1596,8 +1802,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -1608,112 +1814,87 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 16.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 225.75, - "tps_std": 0.69, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 16.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 127.35, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", "tps_mean": 16.8, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 222.2, + "tps_std": 0.63, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 16.82, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 126.55, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1721,8 +1902,33 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.07, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -1733,12 +1939,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 131.91, - "tps_std": 0.42, + "tps_mean": 131.25, + "tps_std": 0.5, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1746,8 +1952,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -1758,12 +1964,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 17.02, + "tps_mean": 17.31, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, @@ -1771,644 +1977,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 33.87, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 2.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 108.88, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 109.02, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 2.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 117.34, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 2.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 109.17, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 78.54, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 81.12, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.71, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -2462,161 +2032,11 @@ "env_base": "rocm6_4_2", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 33.28, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 30.88, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 95.65, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, "test": null, "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -2624,14 +2044,308 @@ "file_size_gib": null, "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", "build": null }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 16.16, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 98.02, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 2.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 101.83, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 2.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 70.0, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 70.0, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 97.13, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 80.42, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": null, @@ -2646,14 +2360,14 @@ "file_size_gib": null, "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", "build": null }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": null, @@ -2668,7 +2382,7 @@ "file_size_gib": null, "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", "build": null }, { @@ -2679,12 +2393,12 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 95.63, - "tps_std": 0.19, + "tps_mean": 97.31, + "tps_std": 0.2, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2692,8 +2406,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -2704,12 +2418,12 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 2.73, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2717,8 +2431,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -2729,12 +2443,12 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 103.15, + "tps_mean": 100.85, "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2742,8 +2456,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -2754,12 +2468,12 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 2.73, + "tps_mean": 2.77, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2767,8 +2481,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -2778,20 +2492,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, + "test": "pp512", + "tps_mean": 93.0, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -2800,20 +2542,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, + "test": "pp512", + "tps_mean": 97.88, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 2.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -2823,12 +2593,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 95.15, - "tps_std": 0.14, + "tps_mean": 99.41, + "tps_std": 0.36, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2836,8 +2606,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -2848,12 +2618,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 2.74, + "tps_mean": 2.77, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2861,8 +2631,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -2894,6 +2664,56 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, + "test": "pp512", + "tps_mean": 94.06, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, "test": null, "tps_mean": null, "tps_std": null, @@ -2906,59 +2726,9 @@ "file_size_gib": null, "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": null }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 30.04, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -2967,12 +2737,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 98.2, - "tps_std": 0.18, + "tps_mean": 98.03, + "tps_std": 0.24, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -2980,8 +2750,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -2992,12 +2762,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 2.75, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3005,8 +2775,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3017,12 +2787,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 99.14, - "tps_std": 0.35, + "tps_mean": 99.12, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3030,8 +2800,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3042,12 +2812,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 2.74, + "tps_mean": 2.77, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3055,8 +2825,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3067,12 +2837,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 79.91, - "tps_std": 0.16, + "tps_mean": 75.59, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3080,8 +2850,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3092,12 +2862,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 2.75, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3105,8 +2875,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3117,12 +2887,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 82.4, - "tps_std": 0.16, + "tps_mean": 80.09, + "tps_std": 0.38, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3130,8 +2900,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3142,12 +2912,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 2.75, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, @@ -3155,8 +2925,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3166,48 +2936,20 @@ "env_base": "rocm6_4_2", "env_variant": "rocwmma", "fa": false, - "test": "pp512", - "tps_mean": 134.21, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -3238,48 +2980,20 @@ "env_base": "rocm6_4_2", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 133.77, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -3292,7 +3006,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -3306,14 +3020,136 @@ { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": null, "tps_mean": null, "tps_std": null, "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 291.08, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 134.19, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.56, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, "error_type": "hang", "backend": null, "ngl": null, @@ -3322,14 +3158,64 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 270.28, + "tps_std": 1.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.58, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": null, @@ -3344,14 +3230,14 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": null, @@ -3366,19 +3252,63 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, "test": "pp512", - "tps_mean": 103.96, - "tps_std": 0.18, + "tps_mean": 285.84, + "tps_std": 9.41, "error": false, "error_type": null, "backend": "ROCm", @@ -3388,22 +3318,22 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 14.47, - "tps_std": 0.02, + "tps_mean": 14.37, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3413,110 +3343,10 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 273.64, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 293.87, - "tps_std": 1.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -3526,20 +3356,23 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 273.97, + "tps_std": 1.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -3547,6 +3380,131 @@ "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.57, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 285.26, + "tps_std": 1.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.33, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 276.37, + "tps_std": 1.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.57, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, "fa": true, "test": null, "tps_mean": null, @@ -3560,109 +3518,9 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", "build": null }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 269.3, - "tps_std": 1.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 225.7, - "tps_std": 1.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -3671,8 +3529,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 135.16, - "tps_std": 0.44, + "tps_mean": 269.17, + "tps_std": 0.99, "error": false, "error_type": null, "backend": "ROCm", @@ -3684,8 +3542,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -3696,8 +3554,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 14.41, - "tps_std": 0.0, + "tps_mean": 14.63, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -3709,8 +3567,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -3743,12 +3601,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 243.54, - "tps_std": 1.24, + "tps_mean": 242.07, + "tps_std": 1.05, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 82.35, @@ -3756,8 +3614,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3768,12 +3626,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 15.34, - "tps_std": 0.0, + "tps_mean": 15.56, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 82.35, @@ -3781,8 +3639,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3793,112 +3651,112 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 246.48, - "tps_std": 1.35, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 147.36, - "tps_std": 0.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.3, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 150.06, + "tps_mean": 244.49, "tps_std": 1.13, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 15.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 147.08, + "tps_std": 0.98, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.5, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 149.97, + "tps_std": 1.1, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 82.35, @@ -3906,8 +3764,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3918,12 +3776,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.27, + "tps_mean": 15.49, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 82.35, @@ -3931,8 +3789,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -3942,48 +3800,20 @@ "env_base": "rocm6_4_2", "env_variant": "rocwmma", "fa": false, - "test": "pp512", - "tps_mean": 135.23, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 11.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", @@ -4014,48 +3844,20 @@ "env_base": "rocm6_4_2", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 135.29, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", @@ -4068,7 +3870,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -4082,58 +3884,202 @@ { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 262.13, - "tps_std": 9.71, + "tps_mean": 270.35, + "tps_std": 3.39, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 11.65, - "tps_std": 0.01, + "tps_mean": 11.78, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 292.23, + "tps_std": 3.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 11.73, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 140.27, + "tps_std": 0.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": null, @@ -4148,14 +4094,14 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": null, @@ -4170,14 +4116,14 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": null, @@ -4192,7 +4138,7 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log", "build": null }, { @@ -4203,12 +4149,12 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 267.45, - "tps_std": 1.9, + "tps_mean": 279.13, + "tps_std": 2.9, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4216,8 +4162,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -4228,12 +4174,12 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 11.6, - "tps_std": 0.05, + "tps_mean": 11.79, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4241,8 +4187,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -4253,12 +4199,12 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 293.37, - "tps_std": 7.08, + "tps_mean": 293.6, + "tps_std": 3.84, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4266,8 +4212,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -4278,12 +4224,12 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 11.54, - "tps_std": 0.03, + "tps_mean": 11.62, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4291,8 +4237,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -4302,20 +4248,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 264.02, + "tps_std": 2.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.79, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", @@ -4324,20 +4298,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 279.69, + "tps_std": 2.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.6, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", @@ -4347,12 +4349,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 272.38, - "tps_std": 1.28, + "tps_mean": 252.38, + "tps_std": 7.7, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4360,8 +4362,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -4372,12 +4374,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 11.64, - "tps_std": 0.01, + "tps_mean": 11.35, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4385,8 +4387,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -4418,20 +4420,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 271.54, + "tps_std": 4.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.57, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", @@ -4463,12 +4493,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 255.55, - "tps_std": 1.38, + "tps_mean": 258.54, + "tps_std": 1.39, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4476,8 +4506,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -4488,12 +4518,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 12.27, + "tps_mean": 12.45, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4501,8 +4531,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -4513,12 +4543,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 259.07, - "tps_std": 1.3, + "tps_mean": 262.84, + "tps_std": 1.39, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4526,8 +4556,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -4538,12 +4568,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.11, + "tps_mean": 12.3, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4551,8 +4581,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -4563,62 +4593,62 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 168.01, + "tps_mean": 169.23, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 12.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 173.79, "tps_std": 0.85, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 12.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 172.71, - "tps_std": 0.91, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4626,8 +4656,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -4638,12 +4668,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.28, - "tps_std": 0.0, + "tps_mean": 12.44, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, @@ -4651,8 +4681,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -4662,48 +4692,20 @@ "env_base": "rocm6_4_2", "env_variant": "rocwmma", "fa": false, - "test": "pp512", - "tps_mean": 137.82, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 17.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", @@ -4734,206 +4736,6 @@ "env_base": "rocm6_4_2", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 137.63, - "tps_std": 0.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 122.98, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 281.87, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 233.14, - "tps_std": 0.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, "test": null, "tps_mean": null, "tps_std": null, @@ -4946,42 +4748,20 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, "fa": true, "test": null, "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, "error_type": "runtime", "backend": null, "ngl": null, @@ -4990,32 +4770,376 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 285.51, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 17.7, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 308.62, + "tps_std": 2.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 17.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 137.71, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 142.62, + "tps_std": 0.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 286.37, + "tps_std": 1.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 234.68, + "tps_std": 1.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 17.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 290.54, + "tps_std": 1.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 307.08, - "tps_std": 2.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -5024,14 +5148,39 @@ "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", - "fa": true, + "fa": false, "test": "tg128", - "tps_mean": 17.34, + "tps_mean": 17.67, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 304.99, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5039,8 +5188,33 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 17.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -5051,8 +5225,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 138.22, - "tps_std": 0.46, + "tps_mean": 283.93, + "tps_std": 1.57, "error": false, "error_type": null, "backend": "ROCm", @@ -5064,8 +5238,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -5076,8 +5250,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 17.45, - "tps_std": 0.09, + "tps_mean": 17.65, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -5089,8 +5263,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -5100,20 +5274,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 300.13, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", @@ -5123,12 +5325,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 281.24, + "tps_mean": 291.6, "tps_std": 1.95, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5136,8 +5338,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -5148,12 +5350,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 17.56, + "tps_mean": 17.73, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5161,8 +5363,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -5194,20 +5396,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 285.56, + "tps_std": 1.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", @@ -5216,20 +5446,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 227.75, + "tps_std": 1.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", @@ -5239,12 +5497,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 218.27, - "tps_std": 0.8, + "tps_mean": 216.64, + "tps_std": 2.76, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5252,8 +5510,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -5264,12 +5522,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 20.09, - "tps_std": 0.01, + "tps_mean": 20.39, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5277,8 +5535,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -5289,12 +5547,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 220.73, - "tps_std": 0.69, + "tps_mean": 217.68, + "tps_std": 4.15, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5302,8 +5560,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -5314,62 +5572,37 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 19.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 152.77, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.02, + "tps_mean": 19.97, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 151.98, + "tps_std": 0.6, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5377,8 +5610,33 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 20.26, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -5389,12 +5647,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 155.24, - "tps_std": 1.01, + "tps_mean": 154.96, + "tps_std": 0.82, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5402,8 +5660,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -5414,12 +5672,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 19.99, - "tps_std": 0.0, + "tps_mean": 20.28, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, @@ -5427,8 +5685,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -5482,23 +5740,20 @@ "env_base": "rocm6_4_2", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 73.83, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", @@ -5506,102 +5761,221 @@ "env": "rocm6_4_2", "env_base": "rocm6_4_2", "env_variant": null, + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 130.11, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 13.95, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 144.31, + "tps_std": 0.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 13.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 131.78, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, "fa": false, "test": "tg128", "tps_mean": 13.68, - "tps_std": 0.01, + "tps_std": 0.43, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 61.47, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": null, @@ -5616,14 +5990,14 @@ "file_size_gib": null, "name_params_b": 235.0, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log", "build": null }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": null, @@ -5638,14 +6012,14 @@ "file_size_gib": null, "name_params_b": 235.0, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", "build": null }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": null, @@ -5660,7 +6034,7 @@ "file_size_gib": null, "name_params_b": 235.0, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", "build": null }, { @@ -5670,106 +6044,6 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": false, - "test": "pp512", - "tps_mean": 129.7, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 13.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 145.18, - "tps_std": 0.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, "test": null, "tps_mean": null, "tps_std": null, @@ -5782,70 +6056,173 @@ "file_size_gib": null, "name_params_b": 235.0, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", "build": null }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env_variant": "rocwmma", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, "test": "pp512", - "tps_mean": 130.56, - "tps_std": 0.46, + "tps_mean": 141.61, + "tps_std": 0.92, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma", + "fa": true, "test": "tg128", - "tps_mean": 13.87, + "tps_mean": 13.34, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 133.33, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 13.78, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 139.6, + "tps_std": 0.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 13.03, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 134.95, + "tps_std": 0.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -5853,8 +6230,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -5863,48 +6240,70 @@ "env": "rocm7_rc", "env_base": "rocm7_rc", "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 97.08, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, + "fa": false, "test": "tg128", - "tps_mean": 13.9, - "tps_std": 0.03, + "tps_mean": 13.99, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 135.29, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -5914,20 +6313,23 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, + "test": "tg128", + "tps_mean": 13.97, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", @@ -5959,12 +6361,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 114.76, - "tps_std": 0.62, + "tps_mean": 112.93, + "tps_std": 0.63, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -5972,8 +6374,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -5984,62 +6386,37 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 16.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 116.18, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.9, + "tps_mean": 16.43, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 114.35, + "tps_std": 1.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -6047,8 +6424,33 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 16.27, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -6059,12 +6461,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 64.79, - "tps_std": 0.39, + "tps_mean": 64.6, + "tps_std": 0.38, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -6072,8 +6474,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6084,12 +6486,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 16.61, - "tps_std": 0.0, + "tps_mean": 17.03, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -6097,8 +6499,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6109,12 +6511,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 66.84, + "tps_mean": 66.6, "tps_std": 0.42, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -6122,8 +6524,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6134,12 +6536,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 16.86, + "tps_mean": 17.28, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, @@ -6147,8 +6549,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6159,8 +6561,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 157.78, - "tps_std": 2.71, + "tps_mean": 157.75, + "tps_std": 2.58, "error": false, "error_type": null, "backend": "ROCm", @@ -6172,8 +6574,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6184,7 +6586,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 24.56, + "tps_mean": 24.62, "tps_std": 0.0, "error": false, "error_type": null, @@ -6197,8 +6599,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6209,8 +6611,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 161.64, - "tps_std": 2.99, + "tps_mean": 161.9, + "tps_std": 3.05, "error": false, "error_type": null, "backend": "ROCm", @@ -6222,8 +6624,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6234,8 +6636,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 23.94, - "tps_std": 0.0, + "tps_mean": 24.09, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -6247,8 +6649,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6259,12 +6661,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 157.64, - "tps_std": 2.49, + "tps_mean": 157.81, + "tps_std": 2.51, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6272,8 +6674,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6284,12 +6686,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 23.93, + "tps_mean": 24.61, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6297,8 +6699,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6309,12 +6711,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 140.32, - "tps_std": 1.99, + "tps_mean": 140.24, + "tps_std": 1.86, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6322,8 +6724,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6334,12 +6736,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 24.32, - "tps_std": 0.0, + "tps_mean": 24.46, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6347,421 +6749,624 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 424.74, - "tps_std": 7.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 154.45, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 24.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 138.46, - "tps_std": 1.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 425.56, - "tps_std": 3.28, + "tps_mean": 438.42, + "tps_std": 4.14, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 24.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 472.05, - "tps_std": 4.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 24.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 153.54, - "tps_std": 2.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 24.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 158.2, - "tps_std": 2.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 426.72, - "tps_std": 7.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", "tps_mean": 24.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 475.43, + "tps_std": 7.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 24.08, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 158.13, + "tps_std": 2.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 24.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 163.4, + "tps_std": 3.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 24.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 441.36, + "tps_std": 3.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 24.6, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 337.36, + "tps_std": 3.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 24.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 161.73, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 24.58, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 143.05, + "tps_std": 2.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 24.42, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 448.63, + "tps_std": 5.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 24.96, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 473.34, + "tps_std": 8.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 23.99, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 338.07, + "tps_std": 3.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 24.93, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 342.57, + "tps_std": 3.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 23.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 444.3, + "tps_std": 6.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6769,8 +7374,33 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 24.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -6803,8 +7433,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 153.89, - "tps_std": 1.73, + "tps_mean": 333.42, + "tps_std": 6.83, "error": false, "error_type": null, "backend": "ROCm", @@ -6816,8 +7446,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6828,7 +7458,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 24.57, + "tps_mean": 24.69, "tps_std": 0.0, "error": false, "error_type": null, @@ -6841,8 +7471,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -6852,48 +7482,20 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": "pp512", - "tps_mean": 137.06, - "tps_std": 2.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 30.0, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.32, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } + "build": null }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", @@ -6903,12 +7505,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 107.55, - "tps_std": 0.11, + "tps_mean": 106.47, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6916,8 +7518,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6928,12 +7530,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 8.09, + "tps_mean": 8.18, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6941,8 +7543,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6953,12 +7555,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 107.68, - "tps_std": 0.13, + "tps_mean": 106.77, + "tps_std": 0.12, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6966,8 +7568,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -6978,12 +7580,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 8.03, + "tps_mean": 8.11, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -6991,8 +7593,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -7003,12 +7605,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 86.02, + "tps_mean": 84.71, "tps_std": 0.11, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -7016,8 +7618,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -7028,12 +7630,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 7.46, + "tps_mean": 7.52, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -7041,8 +7643,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -7053,12 +7655,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 86.93, - "tps_std": 0.15, + "tps_mean": 85.7, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -7066,8 +7668,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -7078,12 +7680,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 7.44, + "tps_mean": 7.52, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 56.89, @@ -7091,8 +7693,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -7103,8 +7705,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 387.45, - "tps_std": 1.17, + "tps_mean": 387.23, + "tps_std": 0.82, "error": false, "error_type": null, "backend": "ROCm", @@ -7116,8 +7718,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -7128,7 +7730,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 50.42, + "tps_mean": 50.64, "tps_std": 0.01, "error": false, "error_type": null, @@ -7141,8 +7743,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -7153,258 +7755,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 411.6, - "tps_std": 0.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 48.14, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 385.52, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 300.86, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.71, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 534.84, - "tps_std": 2.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.21, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", "tps_mean": 411.72, - "tps_std": 2.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 387.34, - "tps_std": 1.49, + "tps_std": 1.04, "error": false, "error_type": null, "backend": "ROCm", @@ -7414,71 +7766,21 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.23, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 300.58, - "tps_std": 1.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 49.78, + "tps_mean": 48.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -7489,242 +7791,42 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 535.44, - "tps_std": 6.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 50.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 619.02, - "tps_std": 7.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 47.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 387.98, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 413.28, - "tps_std": 2.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 47.63, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 540.14, - "tps_std": 5.22, + "tps_mean": 387.86, + "tps_std": 1.41, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", "env_variant": null, "fa": false, "test": "tg128", @@ -7733,7 +7835,682 @@ "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 301.23, + "tps_std": 0.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 50.07, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 564.83, + "tps_std": 6.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 50.68, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 624.99, + "tps_std": 3.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 48.64, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 389.25, + "tps_std": 2.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 50.66, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 412.18, + "tps_std": 1.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 48.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 562.86, + "tps_std": 10.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 50.74, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 418.07, + "tps_std": 1.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 50.11, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 387.74, + "tps_std": 1.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 50.65, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 301.31, + "tps_std": 0.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 50.37, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 570.31, + "tps_std": 5.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 50.52, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 612.79, + "tps_std": 4.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 46.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 572.09, + "tps_std": 8.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 50.45, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 605.49, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 46.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 573.05, + "tps_std": 6.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 24.53, @@ -7741,8 +8518,33 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 50.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -7753,1340 +8555,396 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 418.6, - "tps_std": 2.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 386.87, - "tps_std": 1.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.5, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 300.4, - "tps_std": 1.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 741.97, - "tps_std": 2.92, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 57.22, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 731.64, - "tps_std": 2.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 53.53, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 396.38, - "tps_std": 1.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 59.54, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 406.84, - "tps_std": 1.62, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 58.5, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 150.37, - "tps_std": 1.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 24.49, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 153.97, - "tps_std": 1.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 23.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 150.06, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 23.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 408.29, - "tps_std": 1.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 145.29, - "tps_std": 1.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 24.53, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 130.39, - "tps_std": 1.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 414.47, - "tps_std": 3.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 24.61, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 460.12, - "tps_std": 5.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 24.02, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 145.43, - "tps_std": 1.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 24.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 150.58, - "tps_std": 1.93, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 413.05, - "tps_std": 2.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 325.48, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 145.83, - "tps_std": 2.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 24.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 130.2, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 107.16, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 8.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 107.26, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 8.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 85.88, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 7.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 86.57, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 192.14, - "tps_std": 0.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 10.75, + "tps_mean": 416.05, "tps_std": 3.44, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 50.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 576.38, + "tps_std": 3.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 50.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 414.62, + "tps_std": 3.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 50.22, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 733.4, + "tps_std": 2.59, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 59.36, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 725.54, + "tps_std": 2.84, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 55.57, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 392.54, + "tps_std": 1.8, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 61.56, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 403.74, + "tps_std": 1.69, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 60.57, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 222.91, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -9097,8 +8955,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 229.77, - "tps_std": 0.18, + "tps_mean": 229.15, + "tps_std": 0.24, "error": false, "error_type": null, "backend": "ROCm", @@ -9110,8 +8968,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -9122,7 +8980,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 13.58, + "tps_mean": 13.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -9135,8 +8993,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -9147,12 +9005,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 222.24, - "tps_std": 0.39, + "tps_mean": 222.59, + "tps_std": 0.24, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9160,8 +9018,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -9172,12 +9030,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 13.86, + "tps_mean": 14.03, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9185,8 +9043,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -9197,12 +9055,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 201.58, - "tps_std": 0.09, + "tps_mean": 197.89, + "tps_std": 3.4, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9210,8 +9068,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -9222,12 +9080,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 13.57, + "tps_mean": 13.76, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9235,120 +9093,320 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 706.58, - "tps_std": 0.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 567.65, + "tps_mean": 734.26, "tps_std": 0.94, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 820.41, + "tps_std": 1.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 13.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 213.4, + "tps_std": 3.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 224.2, + "tps_std": 4.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 13.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 734.7, + "tps_std": 1.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 554.49, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 13.6, + "tps_mean": 13.78, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 222.31, - "tps_std": 0.28, + "tps_mean": 220.22, + "tps_std": 1.6, "error": false, "error_type": null, "backend": "ROCm", @@ -9358,21 +9416,21 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 13.88, + "tps_mean": 14.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -9383,22 +9441,22 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 203.03, - "tps_std": 0.17, + "tps_mean": 193.9, + "tps_std": 1.19, "error": false, "error_type": null, "backend": "ROCm", @@ -9408,21 +9466,21 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 13.58, + "tps_mean": 13.77, "tps_std": 0.0, "error": false, "error_type": null, @@ -9433,10 +9491,10 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { @@ -9447,12 +9505,12 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 703.1, - "tps_std": 0.68, + "tps_mean": 751.04, + "tps_std": 1.24, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9460,8 +9518,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9472,12 +9530,12 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 13.83, + "tps_mean": 14.01, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9485,8 +9543,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9497,12 +9555,12 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 818.63, - "tps_std": 0.82, + "tps_mean": 811.04, + "tps_std": 1.22, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9510,8 +9568,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9522,12 +9580,12 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 13.47, + "tps_mean": 13.45, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9535,8 +9593,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9547,8 +9605,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 222.39, - "tps_std": 0.17, + "tps_mean": 752.99, + "tps_std": 1.44, "error": false, "error_type": null, "backend": "ROCm", @@ -9560,8 +9618,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9572,7 +9630,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 13.81, + "tps_mean": 14.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -9585,8 +9643,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9597,8 +9655,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 228.56, - "tps_std": 0.31, + "tps_mean": 794.9, + "tps_std": 1.42, "error": false, "error_type": null, "backend": "ROCm", @@ -9610,8 +9668,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9622,7 +9680,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 13.51, + "tps_mean": 13.45, "tps_std": 0.0, "error": false, "error_type": null, @@ -9635,8 +9693,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -9647,12 +9705,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 706.92, - "tps_std": 0.89, + "tps_mean": 752.36, + "tps_std": 0.48, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9660,8 +9718,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -9672,312 +9730,287 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 13.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 554.98, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 222.26, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 13.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 201.53, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 675.9, - "tps_std": 1.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 371.03, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 12.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 504.61, - "tps_std": 2.97, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", "tps_mean": 14.05, "tps_std": 0.0, "error": false, "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 569.66, + "tps_std": 0.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 13.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 750.36, + "tps_std": 1.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 559.73, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 13.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 680.44, + "tps_std": 0.55, + "error": false, + "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.39, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 371.66, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 12.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 502.88, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -9985,8 +10018,33 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -9997,12 +10055,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 495.37, - "tps_std": 0.71, + "tps_mean": 496.33, + "tps_std": 1.83, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -10010,8 +10068,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -10022,12 +10080,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 13.87, + "tps_mean": 14.02, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, @@ -10035,8 +10093,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -10047,8 +10105,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 92.82, - "tps_std": 0.46, + "tps_mean": 87.2, + "tps_std": 3.7, "error": false, "error_type": null, "backend": "ROCm", @@ -10060,8 +10118,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10072,7 +10130,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 4.05, + "tps_mean": 4.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -10085,8 +10143,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10097,8 +10155,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 94.62, - "tps_std": 0.56, + "tps_mean": 68.87, + "tps_std": 14.37, "error": false, "error_type": null, "backend": "ROCm", @@ -10110,8 +10168,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10122,7 +10180,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 4.03, + "tps_mean": 4.08, "tps_std": 0.0, "error": false, "error_type": null, @@ -10135,8 +10193,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10147,12 +10205,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 91.25, - "tps_std": 0.44, + "tps_mean": 82.57, + "tps_std": 10.36, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10160,8 +10218,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10172,12 +10230,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.09, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10185,8 +10243,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10197,12 +10255,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 84.81, - "tps_std": 0.48, + "tps_mean": 74.78, + "tps_std": 10.12, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10210,8 +10268,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10222,12 +10280,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.09, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10235,523 +10293,723 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 395.28, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 3.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 468.37, + "tps_std": 1.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 4.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 79.42, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 3.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 89.19, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 405.35, + "tps_mean": 398.35, + "tps_std": 1.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 302.82, + "tps_std": 2.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 59.13, + "tps_std": 7.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 61.26, + "tps_std": 10.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 418.46, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 466.83, + "tps_std": 1.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 4.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 454.1, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 499.43, + "tps_std": 1.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 392.5, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 3.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 311.25, + "tps_std": 0.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 451.69, "tps_std": 0.62, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 310.92, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 86.8, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 82.85, - "tps_std": 0.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 404.79, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 472.91, - "tps_std": 1.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 91.08, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.03, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 93.26, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 368.33, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 3.71, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 311.83, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 80.07, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 27.01, @@ -10760,8 +11018,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10772,7 +11030,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 4.0, + "tps_mean": 4.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -10785,8 +11043,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10796,20 +11054,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 324.43, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", @@ -10863,12 +11149,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 135.01, - "tps_std": 0.28, + "tps_mean": 129.49, + "tps_std": 0.34, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10876,8 +11162,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -10888,12 +11174,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 4.03, + "tps_mean": 4.06, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10901,8 +11187,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -10913,12 +11199,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 137.76, - "tps_std": 0.25, + "tps_mean": 137.67, + "tps_std": 1.25, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10926,8 +11212,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -10938,12 +11224,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 4.03, + "tps_mean": 4.06, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, @@ -10951,8 +11237,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -10963,8 +11249,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 727.59, - "tps_std": 1.45, + "tps_mean": 728.7, + "tps_std": 1.28, "error": false, "error_type": null, "backend": "ROCm", @@ -10976,8 +11262,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -10988,7 +11274,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 76.22, + "tps_mean": 76.63, "tps_std": 0.03, "error": false, "error_type": null, @@ -11001,8 +11287,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -11013,8 +11299,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 750.3, - "tps_std": 1.03, + "tps_mean": 752.52, + "tps_std": 0.83, "error": false, "error_type": null, "backend": "ROCm", @@ -11026,8 +11312,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -11038,7 +11324,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 69.96, + "tps_mean": 70.93, "tps_std": 0.02, "error": false, "error_type": null, @@ -11051,8 +11337,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -11063,12 +11349,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 728.24, - "tps_std": 0.55, + "tps_mean": 729.33, + "tps_std": 1.93, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 3.88, "file_size_gib": 1.8, @@ -11076,8 +11362,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -11088,12 +11374,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 75.89, + "tps_mean": 76.79, "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 3.88, "file_size_gib": 1.8, @@ -11101,8 +11387,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -11113,12 +11399,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 643.29, - "tps_std": 0.97, + "tps_mean": 645.25, + "tps_std": 0.89, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 3.88, "file_size_gib": 1.8, @@ -11126,8 +11412,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -11138,12 +11424,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 69.53, + "tps_mean": 70.31, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 3.88, "file_size_gib": 1.8, @@ -11151,120 +11437,20 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1812.73, - "tps_std": 7.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 76.55, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1548.2, - "tps_std": 4.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 69.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 729.03, - "tps_std": 0.75, + "tps_mean": 2033.46, + "tps_std": 5.16, "error": false, "error_type": null, "backend": "ROCm", @@ -11274,22 +11460,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 76.59, - "tps_std": 0.03, + "tps_mean": 76.47, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", @@ -11299,22 +11485,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 651.26, - "tps_std": 1.22, + "tps_mean": 2276.86, + "tps_std": 9.6, "error": false, "error_type": null, "backend": "ROCm", @@ -11324,22 +11510,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 69.44, - "tps_std": 0.01, + "tps_mean": 70.76, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", @@ -11349,122 +11535,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1799.45, - "tps_std": 7.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 75.43, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 2267.56, - "tps_std": 6.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 68.27, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 729.58, - "tps_std": 0.87, + "tps_mean": 727.18, + "tps_std": 2.22, "error": false, "error_type": null, "backend": "ROCm", @@ -11474,22 +11560,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 75.48, - "tps_std": 0.02, + "tps_mean": 75.65, + "tps_std": 0.74, "error": false, "error_type": null, "backend": "ROCm", @@ -11499,21 +11585,96 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 750.44, + "tps_mean": 740.27, + "tps_std": 10.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 70.76, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 2035.38, + "tps_std": 4.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 75.4, "tps_std": 0.8, "error": false, "error_type": null, @@ -11524,22 +11685,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, "fa": true, - "test": "tg128", - "tps_mean": 68.27, - "tps_std": 0.01, + "test": "pp512", + "tps_mean": 1515.55, + "tps_std": 8.1, "error": false, "error_type": null, "backend": "ROCm", @@ -11549,122 +11710,47 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1812.27, - "tps_std": 4.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 76.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1510.06, - "tps_std": 4.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 69.58, - "tps_std": 0.02, + "tps_mean": 70.2, + "tps_std": 0.39, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 729.81, - "tps_std": 1.15, + "tps_mean": 714.75, + "tps_std": 27.98, "error": false, "error_type": null, "backend": "ROCm", @@ -11674,22 +11760,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 76.03, - "tps_std": 0.04, + "tps_mean": 66.1, + "tps_std": 5.25, "error": false, "error_type": null, "backend": "ROCm", @@ -11699,22 +11785,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 645.48, - "tps_std": 1.4, + "tps_mean": 596.86, + "tps_std": 37.66, "error": false, "error_type": null, "backend": "ROCm", @@ -11724,22 +11810,22 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 69.67, - "tps_std": 0.02, + "tps_mean": 58.75, + "tps_std": 3.09, "error": false, "error_type": null, "backend": "ROCm", @@ -11749,694 +11835,622 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1628.18, - "tps_std": 1.73, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 84.23, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 947.36, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 60.35, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1529.98, - "tps_std": 0.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 86.95, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1498.81, - "tps_std": 1.7, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 81.29, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 353.66, - "tps_std": 0.64, + "tps_mean": 2014.6, + "tps_std": 24.35, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 33.65, - "tps_std": 0.0, + "tps_mean": 59.16, + "tps_std": 3.76, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, "test": "pp512", - "tps_mean": 352.4, - "tps_std": 1.12, + "tps_mean": 2191.77, + "tps_std": 78.21, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 31.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 321.54, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 33.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 604.24, - "tps_std": 4.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 548.27, + "tps_mean": 54.32, "tps_std": 2.65, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1991.71, + "tps_std": 2.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 56.37, + "tps_std": 3.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 2096.22, + "tps_std": 4.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 64.88, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 2027.41, + "tps_std": 4.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 77.12, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1550.55, + "tps_std": 4.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 33.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 355.23, - "tps_std": 1.71, + "tps_mean": 70.54, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, - "test": "tg128", - "tps_mean": 33.66, - "tps_std": 0.0, + "test": "pp512", + "tps_mean": 1992.48, + "tps_std": 7.34, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 323.79, - "tps_std": 0.87, + "fa": false, + "test": "tg128", + "tps_mean": 77.05, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": "tg128", - "tps_mean": 33.04, - "tps_std": 0.0, + "test": "pp512", + "tps_mean": 1474.15, + "tps_std": 1.44, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 592.27, - "tps_std": 5.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 33.68, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 735.02, - "tps_std": 5.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 33.34, + "tps_mean": 70.44, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1593.62, + "tps_std": 2.9, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 85.26, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 936.52, + "tps_std": 2.35, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 60.89, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1515.05, + "tps_std": 2.98, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 87.54, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1476.16, + "tps_std": 5.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 82.48, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 353.49, - "tps_std": 1.71, + "tps_mean": 355.59, + "tps_std": 0.86, "error": false, "error_type": null, "backend": "ROCm", @@ -12446,21 +12460,171 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 33.63, + "tps_mean": 33.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 390.43, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 33.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 355.94, + "tps_std": 1.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 33.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 322.57, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 33.3, "tps_std": 0.0, "error": false, "error_type": null, @@ -12471,22 +12635,22 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, "test": "pp512", - "tps_mean": 388.5, - "tps_std": 1.06, + "tps_mean": 622.16, + "tps_std": 6.71, "error": false, "error_type": null, "backend": "ROCm", @@ -12496,18 +12660,268 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 33.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 743.09, + "tps_std": 4.89, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 33.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 354.98, + "tps_std": 0.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 33.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 390.67, + "tps_std": 0.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 33.79, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 617.0, + "tps_std": 4.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 33.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 543.39, + "tps_std": 5.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, "fa": true, "test": "tg128", "tps_mean": 33.28, @@ -12521,10 +12935,310 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 354.18, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 33.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 322.46, + "tps_std": 0.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 33.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 643.61, + "tps_std": 7.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 33.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 736.33, + "tps_std": 3.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 33.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 651.63, + "tps_std": 3.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 33.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 738.84, + "tps_std": 9.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 33.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -12535,12 +13249,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 598.68, - "tps_std": 9.32, + "tps_mean": 649.28, + "tps_std": 0.87, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, @@ -12548,8 +13262,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12560,12 +13274,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 33.75, + "tps_mean": 33.99, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, @@ -12573,8 +13287,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12585,12 +13299,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 546.3, - "tps_std": 3.37, + "tps_mean": 550.01, + "tps_std": 3.85, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, @@ -12598,8 +13312,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12610,12 +13324,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 33.04, + "tps_mean": 33.38, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, @@ -12623,8 +13337,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12635,8 +13349,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 354.34, - "tps_std": 0.67, + "tps_mean": 659.79, + "tps_std": 3.13, "error": false, "error_type": null, "backend": "ROCm", @@ -12648,8 +13362,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12660,7 +13374,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 33.76, + "tps_mean": 34.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -12673,8 +13387,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12685,8 +13399,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 324.26, - "tps_std": 0.8, + "tps_mean": 553.65, + "tps_std": 2.4, "error": false, "error_type": null, "backend": "ROCm", @@ -12698,8 +13412,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12710,7 +13424,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 33.05, + "tps_mean": 33.31, "tps_std": 0.0, "error": false, "error_type": null, @@ -12723,8 +13437,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12735,12 +13449,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 450.26, - "tps_std": 1.46, + "tps_mean": 449.86, + "tps_std": 1.68, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, @@ -12748,8 +13462,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -12760,162 +13474,137 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 33.56, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 499.8, - "tps_std": 1.95, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 230.22, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 243.2, - "tps_std": 1.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.15, + "tps_mean": 34.19, "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 496.21, + "tps_std": 1.71, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 33.64, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 230.09, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 33.57, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 243.96, + "tps_std": 0.96, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, @@ -12923,8 +13612,33 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 33.79, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -12935,8 +13649,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 352.37, - "tps_std": 0.72, + "tps_mean": 353.2, + "tps_std": 0.3, "error": false, "error_type": null, "backend": "ROCm", @@ -12948,8 +13662,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12960,8 +13674,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 45.11, - "tps_std": 0.02, + "tps_mean": 45.42, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -12973,8 +13687,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -12984,20 +13698,48 @@ "env_base": "rocm6_4_2", "env_variant": "rocwmma", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 387.1, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", - "build": null + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 45.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", @@ -13029,12 +13771,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 319.23, - "tps_std": 0.62, + "tps_mean": 319.84, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13042,8 +13784,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -13054,12 +13796,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 43.79, + "tps_mean": 44.43, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13067,117 +13809,167 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 589.45, - "tps_std": 4.75, + "tps_mean": 606.86, + "tps_std": 5.18, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 45.0, - "tps_std": 0.0, + "tps_mean": 45.26, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 539.93, - "tps_std": 1.23, + "tps_mean": 732.72, + "tps_std": 4.06, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 44.01, - "tps_std": 0.0, + "tps_mean": 45.14, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", "fa": false, + "test": "pp512", + "tps_mean": 351.42, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 45.39, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, "test": null, "tps_mean": null, "tps_std": null, @@ -13190,19 +13982,19 @@ "file_size_gib": null, "name_params_b": null, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": null }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, "test": "pp512", - "tps_mean": 323.04, - "tps_std": 0.94, + "tps_mean": 608.2, + "tps_std": 7.04, "error": false, "error_type": null, "backend": "ROCm", @@ -13212,21 +14004,21 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, "test": "tg128", - "tps_mean": 44.01, + "tps_mean": 45.4, "tps_std": 0.01, "error": false, "error_type": null, @@ -13237,10 +14029,160 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 533.95, + "tps_std": 3.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 44.41, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 352.53, + "tps_std": 0.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 45.41, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 320.78, + "tps_std": 0.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 44.49, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -13251,12 +14193,12 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 586.82, - "tps_std": 5.23, + "tps_mean": 635.84, + "tps_std": 5.72, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13264,8 +14206,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -13276,12 +14218,12 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 44.72, - "tps_std": 0.3, + "tps_mean": 45.26, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13289,8 +14231,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -13301,12 +14243,12 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 684.17, - "tps_std": 67.05, + "tps_mean": 708.36, + "tps_std": 12.96, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13314,8 +14256,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -13326,12 +14268,12 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 44.14, - "tps_std": 0.27, + "tps_mean": 44.85, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13339,8 +14281,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -13351,8 +14293,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 350.89, - "tps_std": 1.88, + "tps_mean": 650.68, + "tps_std": 9.08, "error": false, "error_type": null, "backend": "ROCm", @@ -13364,8 +14306,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -13376,7 +14318,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 44.93, + "tps_mean": 45.26, "tps_std": 0.01, "error": false, "error_type": null, @@ -13389,8 +14331,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -13400,110 +14342,113 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 734.35, + "tps_std": 10.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 589.82, - "tps_std": 5.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 45.12, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 540.27, - "tps_std": 2.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 43.89, + "tps_mean": 44.85, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 646.07, + "tps_std": 6.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 45.5, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 541.57, + "tps_std": 3.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13511,8 +14456,33 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 44.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -13523,8 +14493,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 354.6, - "tps_std": 1.2, + "tps_mean": 657.58, + "tps_std": 3.78, "error": false, "error_type": null, "backend": "ROCm", @@ -13536,8 +14506,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -13548,7 +14518,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 45.04, + "tps_mean": 45.56, "tps_std": 0.01, "error": false, "error_type": null, @@ -13561,8 +14531,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -13573,8 +14543,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 319.46, - "tps_std": 0.48, + "tps_mean": 550.79, + "tps_std": 2.99, "error": false, "error_type": null, "backend": "ROCm", @@ -13586,8 +14556,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -13598,7 +14568,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 43.9, + "tps_mean": 44.41, "tps_std": 0.0, "error": false, "error_type": null, @@ -13611,8 +14581,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -13623,12 +14593,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 488.47, - "tps_std": 2.3, + "tps_mean": 485.54, + "tps_std": 2.45, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13636,8 +14606,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13648,12 +14618,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 48.21, - "tps_std": 0.02, + "tps_mean": 49.29, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13661,8 +14631,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13673,12 +14643,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 547.53, - "tps_std": 3.03, + "tps_mean": 540.81, + "tps_std": 2.56, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13686,8 +14656,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13698,12 +14668,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 47.49, - "tps_std": 0.08, + "tps_mean": 48.25, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13711,8 +14681,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13723,12 +14693,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 239.44, - "tps_std": 1.23, + "tps_mean": 239.24, + "tps_std": 1.27, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13736,8 +14706,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13748,12 +14718,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 49.15, - "tps_std": 0.02, + "tps_mean": 50.39, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13761,8 +14731,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13773,12 +14743,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 255.37, - "tps_std": 1.68, + "tps_mean": 255.5, + "tps_std": 1.49, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13786,8 +14756,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13798,12 +14768,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 49.31, - "tps_std": 0.08, + "tps_mean": 50.41, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 59.02, @@ -13811,8 +14781,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -13823,8 +14793,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 324.31, - "tps_std": 4.5, + "tps_mean": 324.3, + "tps_std": 4.23, "error": false, "error_type": null, "backend": "ROCm", @@ -13836,8 +14806,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -13848,7 +14818,257 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 26.87, + "tps_mean": 27.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 342.14, + "tps_std": 4.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 27.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 324.36, + "tps_std": 4.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 27.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 304.23, + "tps_std": 3.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 26.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 1198.51, + "tps_std": 10.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 27.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1506.44, + "tps_std": 7.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 27.1, "tps_std": 0.01, "error": false, "error_type": null, @@ -13859,472 +15079,22 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 343.3, - "tps_std": 5.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 26.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 322.55, - "tps_std": 4.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.9, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 304.86, - "tps_std": 3.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 26.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1135.9, - "tps_std": 9.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 26.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1011.32, - "tps_std": 4.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 26.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 313.05, - "tps_std": 6.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 26.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 301.3, - "tps_std": 4.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 26.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1130.14, - "tps_std": 7.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 26.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1502.62, - "tps_std": 12.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 26.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 319.92, - "tps_std": 6.39, + "tps_mean": 326.8, + "tps_std": 4.56, "error": false, "error_type": null, "backend": "ROCm", @@ -14334,20 +15104,170 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", + "tps_mean": 27.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 350.18, + "tps_std": 5.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 27.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1185.57, + "tps_std": 6.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 27.12, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1000.77, + "tps_std": 2.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", "tps_mean": 26.83, "tps_std": 0.0, "error": false, @@ -14359,10 +15279,260 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 322.0, + "tps_std": 4.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 27.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 303.26, + "tps_std": 4.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 26.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 1256.75, + "tps_std": 10.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 27.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1481.17, + "tps_std": 9.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 27.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1202.19, + "tps_std": 5.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 27.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -14373,8 +15543,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 338.36, - "tps_std": 5.02, + "tps_mean": 1422.9, + "tps_std": 11.48, "error": false, "error_type": null, "backend": "ROCm", @@ -14386,8 +15556,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -14398,7 +15568,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 26.71, + "tps_mean": 27.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -14411,8 +15581,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -14423,12 +15593,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1130.86, - "tps_std": 14.88, + "tps_mean": 1253.01, + "tps_std": 23.2, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14436,8 +15606,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -14448,37 +15618,62 @@ "env_variant": null, "fa": false, "test": "tg128", + "tps_mean": 27.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1005.24, + "tps_std": 32.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", "tps_mean": 26.89, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1007.82, - "tps_std": 22.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14486,48 +15681,48 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", + "env": "rocm7_rc-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1220.02, + "tps_std": 12.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, "test": "tg128", - "tps_mean": 26.66, + "tps_mean": 27.17, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 321.8, - "tps_std": 6.18, - "error": false, - "error_type": null, - "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 20.91, @@ -14536,33 +15731,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 26.83, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", - "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -14573,8 +15743,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 302.84, - "tps_std": 5.01, + "tps_mean": 985.58, + "tps_std": 10.64, "error": false, "error_type": null, "backend": "ROCm", @@ -14586,8 +15756,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -14598,7 +15768,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 26.61, + "tps_mean": 26.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -14611,8 +15781,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -14623,12 +15793,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 369.6, - "tps_std": 1.3, + "tps_mean": 367.61, + "tps_std": 1.9, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14636,8 +15806,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14648,12 +15818,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 8.72, + "tps_mean": 8.69, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14661,8 +15831,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14673,12 +15843,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 389.96, - "tps_std": 1.87, + "tps_mean": 386.12, + "tps_std": 1.98, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14686,8 +15856,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14698,12 +15868,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 8.7, + "tps_mean": 8.66, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14711,8 +15881,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14723,12 +15893,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 318.04, - "tps_std": 1.5, + "tps_mean": 315.56, + "tps_std": 1.4, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14736,8 +15906,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14748,12 +15918,12 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 7.89, + "tps_mean": 7.86, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14761,8 +15931,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14773,12 +15943,12 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 334.64, - "tps_std": 1.46, + "tps_mean": 333.31, + "tps_std": 1.47, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14786,8 +15956,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14798,12 +15968,12 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 7.9, + "tps_mean": 7.92, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, @@ -14811,8 +15981,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -14823,8 +15993,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 581.92, - "tps_std": 2.0, + "tps_mean": 582.6, + "tps_std": 4.9, "error": false, "error_type": null, "backend": "ROCm", @@ -14836,8 +16006,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -14848,7 +16018,107 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 64.34, + "tps_mean": 64.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 644.05, + "tps_std": 3.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_2-rocwmma", + "env_base": "rocm6_4_2", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 64.63, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 581.11, + "tps_std": 2.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_2", + "env_base": "rocm6_4_2", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 65.0, "tps_std": 0.02, "error": false, "error_type": null, @@ -14859,22 +16129,22 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2-rocwmma", + "env": "rocm6_4_2", "env_base": "rocm6_4_2", - "env_variant": "rocwmma", + "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 642.4, - "tps_std": 3.59, + "tps_mean": 522.29, + "tps_std": 2.36, "error": false, "error_type": null, "backend": "ROCm", @@ -14884,21 +16154,21 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2-rocwmma", + "env": "rocm6_4_2", "env_base": "rocm6_4_2", - "env_variant": "rocwmma", + "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 63.74, + "tps_mean": 63.63, "tps_std": 0.0, "error": false, "error_type": null, @@ -14909,222 +16179,22 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 582.94, - "tps_std": 2.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 522.14, - "tps_std": 1.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 62.97, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1128.54, - "tps_std": 2.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.39, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1005.66, - "tps_std": 1.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 63.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 585.03, - "tps_std": 1.84, + "tps_mean": 1184.03, + "tps_std": 8.37, "error": false, "error_type": null, "backend": "ROCm", @@ -15134,21 +16204,171 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 64.36, + "tps_mean": 65.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1480.28, + "tps_std": 9.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 64.45, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 584.04, + "tps_std": 2.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 64.87, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 643.25, + "tps_std": 3.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 64.67, "tps_std": 0.01, "error": false, "error_type": null, @@ -15159,22 +16379,22 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, "test": "pp512", - "tps_mean": 528.92, - "tps_std": 2.02, + "tps_mean": 1171.02, + "tps_std": 7.04, "error": false, "error_type": null, "backend": "ROCm", @@ -15184,21 +16404,71 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 64.94, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 996.31, + "tps_std": 6.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 63.0, + "tps_mean": 63.68, "tps_std": 0.01, "error": false, "error_type": null, @@ -15209,10 +16479,110 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 582.51, + "tps_std": 2.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 64.89, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 522.63, + "tps_std": 1.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 63.66, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" } }, { @@ -15223,12 +16593,12 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1124.54, - "tps_std": 9.14, + "tps_mean": 1236.64, + "tps_std": 11.2, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, @@ -15236,8 +16606,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -15248,108 +16618,83 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 64.19, + "tps_mean": 64.78, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1474.7, - "tps_std": 11.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 63.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 583.69, - "tps_std": 2.09, - "error": false, - "error_type": null, - "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1460.58, + "tps_std": 11.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, "test": "tg128", "tps_mean": 64.26, - "tps_std": 0.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1299.34, + "tps_std": 7.77, "error": false, "error_type": null, "backend": "ROCm", @@ -15361,8 +16706,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "de219279", + "number": "6181" } }, { @@ -15371,139 +16716,139 @@ "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 642.92, - "tps_std": 1.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 63.28, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1125.6, - "tps_std": 1.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 64.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 997.74, - "tps_std": 8.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 63.0, + "tps_mean": 64.85, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", - "ngl": 999, + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1516.33, + "tps_std": 21.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 64.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1246.14, + "tps_std": 8.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 65.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1010.38, + "tps_std": 6.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, @@ -15511,8 +16856,33 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 63.49, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" } }, { @@ -15523,8 +16893,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 584.02, - "tps_std": 1.44, + "tps_mean": 1303.74, + "tps_std": 6.94, "error": false, "error_type": null, "backend": "ROCm", @@ -15536,8 +16906,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -15548,7 +16918,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 64.5, + "tps_mean": 65.1, "tps_std": 0.01, "error": false, "error_type": null, @@ -15561,8 +16931,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -15573,8 +16943,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 525.48, - "tps_std": 1.39, + "tps_mean": 1037.92, + "tps_std": 11.67, "error": false, "error_type": null, "backend": "ROCm", @@ -15586,8 +16956,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -15598,7 +16968,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 63.04, + "tps_mean": 63.63, "tps_std": 0.01, "error": false, "error_type": null, @@ -15611,8 +16981,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "79c1160b", - "number": "6123" + "hash": "de219279", + "number": "6181" } }, { @@ -15623,12 +16993,12 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1218.18, - "tps_std": 8.08, + "tps_mean": 1220.69, + "tps_std": 8.95, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, @@ -15636,8 +17006,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } }, { @@ -15648,137 +17018,162 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 69.76, + "tps_mean": 71.42, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1467.61, + "tps_std": 12.7, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 69.47, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 651.21, + "tps_std": 5.24, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 72.35, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 732.35, + "tps_std": 7.51, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 72.05, "tps_std": 0.07, "error": false, "error_type": null, "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1482.59, - "tps_std": 12.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 68.63, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 649.86, - "tps_std": 5.16, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 70.72, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 728.71, - "tps_std": 8.4, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, + "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, @@ -15786,893 +17181,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 70.49, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 33.76, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 31.69, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 99.09, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 81.54, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log", - "build": null - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 31.63, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta-hblt0", - "env_base": "rocm7_beta", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 99.41, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 106.7, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 33.87, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 34.48, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 99.16, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 81.56, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log", - "build": { - "hash": "79c1160b", - "number": "6123" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log", - "build": null - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log", - "build": null - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 72.73, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 5.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 73.47, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 5.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 78.79, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 5.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 80.58, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 5.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.55, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log", - "build": { - "hash": "34c9d765", - "number": "6122" + "hash": "1fe00296", + "number": "6182" } } ]