From 995ad2cd38cc55ff79700c67d0d5af106ca46660 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Sat, 9 Aug 2025 11:50:27 +0100 Subject: [PATCH] Updated benchmarks --- README.md | 2 +- benchmark/run_benchmarks.log.backup | 358 -------------------- benchmark/run_loadtime_benchmark.log.backup | 331 ------------------ benchmark/summarize_results.py | 77 ----- docs/benchmarks.md | 6 +- 5 files changed, 6 insertions(+), 768 deletions(-) delete mode 100644 benchmark/run_benchmarks.log.backup delete mode 100644 benchmark/run_loadtime_benchmark.log.backup delete mode 100644 benchmark/summarize_results.py diff --git a/README.md b/README.md index 73afb27..897854f 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,7 @@ PP = prompt processing (tokens/sec prefill), TG = token generation (tokens/sec i * ROCm 7.0 Beta/RC show similar performance to 6.4.2 without consistent gains. 📄 Full per-model analysis: [docs/benchmarks.md](docs/benchmarks.md) -🌐 Interactive exploration: [Live Benchmark Viewer](https://your-live-results-url) +🌐 Interactive exploration: [Live Benchmark Viewer](https://kyuz0.github.io/amd-strix-halo-toolboxes/) ## 4. Memory Planning & VRAM Estimator diff --git a/benchmark/run_benchmarks.log.backup b/benchmark/run_benchmarks.log.backup deleted file mode 100644 index 47d48fc..0000000 --- a/benchmark/run_benchmarks.log.backup +++ /dev/null @@ -1,358 +0,0 @@ -Found 11 model(s) to bench: - • /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - • /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - • /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - • /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - • /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - • /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - • /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - • /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - • /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - • /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - • /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [host] gemma-3-12b-it-UD-Q8_K_XL - → log: results/gemma-3-12b-it-UD-Q8_K_XL__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-12b-it-UD-Q8_K_XL/gemma-3-12b-it-UD-Q8_K_XL.gguf - - -▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 : FAILED - -▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - -▶ [host] gemma-3-27b-it-BF16-00001-of-00002 - → log: results/gemma-3-27b-it-BF16-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf - - * [host] gemma-3-27b-it-BF16-00001-of-00002 : FAILED - -▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [host] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 - → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf - - * [host] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED - -▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [host] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 - → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [host] llama3.3-70.6B-Q4_K_M - → log: results/llama3.3-70.6B-Q4_K_M__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [host] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [host] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf - - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED - -▶ [host] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 - → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf - - -▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED - -▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [host] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 - → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf - - -▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [host] Qwen3-30B-A3B-BF16-00001-of-00002 - → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf - - -▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log - → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log - → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log - → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log - → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log - → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - - -▶ [host] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 - → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__host.log - → cmd: llama-bench -ngl 99 -mmp 0 -m /home/kyuz0/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf - diff --git a/benchmark/run_loadtime_benchmark.log.backup b/benchmark/run_loadtime_benchmark.log.backup deleted file mode 100644 index e3578c3..0000000 --- a/benchmark/run_loadtime_benchmark.log.backup +++ /dev/null @@ -1,331 +0,0 @@ -Found 11 models to test with llama-cli (3 runs each) - -▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL (runs: 3) - → log : loadtime_results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL avg=3.861s over 3 runs - -▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL (runs: 3) - → log : loadtime_results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL avg=3.434s over 3 runs - -▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL (runs: 3) - → log : loadtime_results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL avg=4.295s over 3 runs - -▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL (runs: 3) - → log : loadtime_results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL avg=3.955s over 3 runs - -▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL (runs: 3) - → log : loadtime_results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL avg=6.686s over 3 runs - -▶ [host] gemma-3-12b-it-UD-Q8_K_XL (runs: 3) - → log : loadtime_results/gemma-3-12b-it-UD-Q8_K_XL__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] gemma-3-12b-it-UD-Q8_K_XL avg=3.785s over 3 runs - -▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 avg=10.417s over 3 runs - -▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 avg=10.486s over 3 runs - -▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 avg=13.579s over 3 runs - -▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✖ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 all runs failed - -▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 avg=12.495s over 3 runs - -▶ [host] gemma-3-27b-it-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/gemma-3-27b-it-BF16-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✖ [host] gemma-3-27b-it-BF16-00001-of-00002 all runs failed - -▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 avg=26.362s over 3 runs - -▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 avg=30.024s over 3 runs - -▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 avg=30.591s over 3 runs - -▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✖ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 all runs failed - -▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 avg=35.301s over 3 runs - -▶ [host] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✖ [host] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 all runs failed - -▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 avg=32.911s over 3 runs - -▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 avg=32.796s over 3 runs - -▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 avg=30.376s over 3 runs - -▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 avg=30.604s over 3 runs - -▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 avg=30.998s over 3 runs - -▶ [host] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 avg=31.133s over 3 runs - -▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M (runs: 3) - → log : loadtime_results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] llama3.3-70.6B-Q4_K_M avg=14.602s over 3 runs - -▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M (runs: 3) - → log : loadtime_results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] llama3.3-70.6B-Q4_K_M avg=9.338s over 3 runs - -▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M (runs: 3) - → log : loadtime_results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] llama3.3-70.6B-Q4_K_M avg=8.816s over 3 runs - -▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M (runs: 3) - → log : loadtime_results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M avg=9.176s over 3 runs - -▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M (runs: 3) - → log : loadtime_results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] llama3.3-70.6B-Q4_K_M avg=9.887s over 3 runs - -▶ [host] llama3.3-70.6B-Q4_K_M (runs: 3) - → log : loadtime_results/llama3.3-70.6B-Q4_K_M__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] llama3.3-70.6B-Q4_K_M avg=8.979s over 3 runs - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 avg=19.365s over 2 runs - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✖ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 all runs failed - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 avg=20.045s over 3 runs - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 avg=16.752s over 3 runs - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 avg=15.776s over 3 runs - -▶ [host] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 avg=18.146s over 3 runs - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 avg=28.435s over 3 runs - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 avg=28.221s over 3 runs - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 avg=32.810s over 3 runs - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 avg=35.541s over 3 runs - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 avg=31.792s over 3 runs - -▶ [host] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 avg=33.403s over 3 runs - -▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 avg=35.742s over 3 runs - -▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 avg=36.400s over 3 runs - -▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 avg=41.626s over 3 runs - -▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 avg=47.967s over 3 runs - -▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 avg=40.739s over 3 runs - -▶ [host] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 (runs: 3) - → log : loadtime_results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 avg=47.723s over 3 runs - -▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 (runs: 3) - → log : loadtime_results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 avg=33.458s over 3 runs - -▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 (runs: 3) - → log : loadtime_results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 avg=35.392s over 3 runs - -▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 (runs: 3) - → log : loadtime_results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 avg=40.722s over 3 runs - -▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 (runs: 3) - → log : loadtime_results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 avg=44.883s over 3 runs - -▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 (runs: 3) - → log : loadtime_results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 avg=39.062s over 3 runs - -▶ [host] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 (runs: 3) - → log : loadtime_results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 avg=44.276s over 3 runs - -▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 avg=22.669s over 3 runs - -▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 avg=15.930s over 3 runs - -▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 avg=14.761s over 3 runs - -▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 avg=12.935s over 3 runs - -▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 avg=22.166s over 3 runs - -▶ [host] Qwen3-30B-A3B-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-30B-A3B-BF16-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Qwen3-30B-A3B-BF16-00001-of-00002 avg=13.034s over 3 runs - -▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 avg=16.161s over 3 runs - -▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 avg=14.392s over 3 runs - -▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 avg=14.021s over 3 runs - -▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 avg=12.940s over 3 runs - -▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 avg=17.779s over 3 runs - -▶ [host] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 (runs: 3) - → log : loadtime_results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__host.log - → flags : -ngl 999 -fa --no-mmap -no-cnv -n 1 -✔ [host] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 avg=12.546s over 3 runs diff --git a/benchmark/summarize_results.py b/benchmark/summarize_results.py deleted file mode 100644 index 839c613..0000000 --- a/benchmark/summarize_results.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -import json -from collections import defaultdict -from statistics import mean - -# CONFIG -TOLERANCE_MULTIPLIER = 1.0 # multiplier for std dev to count as "within best" - -def within_tolerance(best_mean, best_std, contender_mean, contender_std): - # Winner if contender is within (best_mean - best_std * tol) of best_mean - return contender_mean >= (best_mean - TOLERANCE_MULTIPLIER * best_std) - -# --- Load data --- -with open("../docs/results.json", encoding="utf-8") as f: - data = json.load(f) - -runs = data["runs"] - -# --- Group by benchmark type --- -benchmarks = defaultdict(list) -for r in runs: - if r["error"]: - continue - if r["test"] in ("pp512", "tg128"): - benchmarks[r["test"]].append(r) - -summary = {} - -for bench_type, results in benchmarks.items(): - winners_count = defaultdict(int) - backend_perf = defaultdict(list) - - # Group results by model - models = defaultdict(list) - for r in results: - models[r["model_clean"]].append(r) - - for model, entries in models.items(): - # Find the best mean - best_entry = max(entries, key=lambda x: x["tps_mean"]) - best_mean = best_entry["tps_mean"] - best_std = best_entry["tps_std"] or 0 - - # Find all within tolerance - for e in entries: - if e["tps_mean"] is None: - continue - if within_tolerance(best_mean, best_std, e["tps_mean"], e["tps_std"] or 0): - label = f"{e['env']}{' (FA on)' if e['fa'] else ' (FA off)'}" - winners_count[label] += 1 - - # Collect performance data for average TPS - for e in entries: - label = f"{e['env']}{' (FA on)' if e['fa'] else ' (FA off)'}" - if e["tps_mean"] is not None: - backend_perf[label].append(e["tps_mean"]) - - # Store summary - summary[bench_type] = { - "winners": dict(sorted(winners_count.items(), key=lambda x: -x[1])), - "avg_perf": {k: round(mean(v), 2) for k, v in backend_perf.items()}, - "total_models": len(models), - } - -# --- Print human-readable analysis --- -for bench_type in ("pp512", "tg128"): - if bench_type not in summary: - continue - print(f"\n=== {bench_type.upper()} ===") - print(f"Models tested: {summary[bench_type]['total_models']}") - print("Winner counts (within tolerance):") - for backend, count in summary[bench_type]["winners"].items(): - print(f" {backend}: {count} models") - print("Average throughput (tokens/sec):") - for backend, avg in sorted(summary[bench_type]["avg_perf"].items(), key=lambda x: -x[1]): - print(f" {backend}: {avg}") - diff --git a/docs/benchmarks.md b/docs/benchmarks.md index bb46ace..e14538a 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -1,7 +1,11 @@ # AMD Strix Halo — llama.cpp Toolboxes (Benchmarks) **Live results:** [https://kyuz0.github.io/amd-strix-halo-toolboxes/](https://kyuz0.github.io/amd-strix-halo-toolboxes/) -Filter by model name, size, and quantization; select backends with or without **Flash Attention (FA)**; compare pp512 and tg128 side-by-side; winners are computed with an error-aware tolerance rule. + +- Filter by model name, size, and quantization +- Select backends with or without **Flash Attention (FA)** +- Compare pp512 and tg128 side-by-side +- Winners are computed with an error-aware tolerance rule. ---