From 1acda692247e1e5695642b1f1127c16611e950de Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Mon, 18 Aug 2025 22:25:28 +0100 Subject: [PATCH] updated benhcmakrs with reference llama 2 model --- README.md | 14 +- .../llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log | 10 + ...lama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log | 10 + ...ma-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log | 10 + ...7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log | 10 + .../results/llama-2-7b.Q4_0__rocm6_4_3.log | 10 + .../llama-2-7b.Q4_0__rocm6_4_3__fa1.log | 10 + .../llama-2-7b.Q4_0__rocm6_4_3__hblt0.log | 10 + ...llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log | 10 + .../llama-2-7b.Q4_0__rocm7_rc-rocwmma.log | 10 + ...llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log | 10 + ...ama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log | 10 + ...-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log | 10 + .../results/llama-2-7b.Q4_0__rocm7_rc.log | 10 + .../llama-2-7b.Q4_0__rocm7_rc__fa1.log | 10 + .../llama-2-7b.Q4_0__rocm7_rc__hblt0.log | 10 + .../llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log | 10 + .../llama-2-7b.Q4_0__vulkan_amdvlk.log | 8 + .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 8 + .../results/llama-2-7b.Q4_0__vulkan_radv.log | 8 + .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 8 + docs/benchmarks.md | 92 +- docs/results.json | 1002 ++++++++++++++++- 23 files changed, 1247 insertions(+), 53 deletions(-) create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log diff --git a/README.md b/README.md index 10704ba..48e7ae9 100644 --- a/README.md +++ b/README.md @@ -155,24 +155,24 @@ Benchmarks were analysed with **error-aware ties** (mean ± σ). If two backends **Prompt Processing (pp512)** | Backend | 1st | 2nd | 3rd | | --- | ---: | ---: | ---: | -| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 9 | 5 | 0 | +| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 9 | 6 | 0 | +| Vulkan AMDVLK | 4 | 0 | 2 | | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 3 | 3 | 8 | -| Vulkan AMDVLK | 3 | 0 | 2 | -| ROCm 7 RC + ROCWMMA + hipBLASLt | 1 | 8 | 4 | +| ROCm 7 RC + ROCWMMA + hipBLASLt | 1 | 8 | 5 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 0 | 0 | 1 | | Vulkan RADV | 0 | 0 | 1 | **Token Generation (tg128)** | Backend | 1st | 2nd | 3rd | | --- | ---: | ---: | ---: | -| Vulkan RADV | 13 | 0 | 0 | +| Vulkan RADV | 14 | 0 | 0 | | ROCm 6.4.3 (hipBLASLt) | 3 | 0 | 1 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 1 | 4 | 3 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 1 | 2 | 4 | | ROCm 6.4.3 (hipBLASLt OFF) | 1 | 1 | 1 | -| ROCm 7 RC (hipBLASLt OFF) | 1 | 1 | 1 | +| ROCm 7 RC (hipBLASLt) | 1 | 1 | 4 | +| ROCm 7 RC (hipBLASLt OFF) | 1 | 1 | 2 | | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 1 | 1 | 1 | -| ROCm 7 RC (hipBLASLt) | 1 | 0 | 4 | | Vulkan AMDVLK | 0 | 10 | 0 | | ROCm 7 RC + ROCWMMA + hipBLASLt | 0 | 1 | 2 | @@ -181,6 +181,8 @@ Benchmarks were analysed with **error-aware ties** (mean ± σ). If two backends - **Fastest token generation:** Vulkan RADV (most 1st-place finishes). - **Balanced choice:** ROCm 6.4.3 + ROCWMMA (hipBLASLt) (consistently near the top across PP/TG). +> **Note (ROCm 7):** Toolboxes enable **hipBLASLt** by default. The benchmark suite also runs **hipBLASLt OFF** variants to show its impact. + 📄 Full per-model analysis: [docs/benchmarks.md](docs/benchmarks.md) ## 4. Memory Planning & VRAM Estimator diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log new file mode 100644 index 0000000..2220109 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 981.76 ± 1.61 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.26 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log new file mode 100644 index 0000000..79d7534 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1096.97 ± 5.09 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.33 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log new file mode 100644 index 0000000..54ebe40 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.00 ± 0.44 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.39 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3d3918a --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 367.46 ± 0.31 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.20 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log new file mode 100644 index 0000000..2c94893 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 978.30 ± 1.98 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.39 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log new file mode 100644 index 0000000..fd39b3e --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 686.88 ± 0.38 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.80 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log new file mode 100644 index 0000000..689c168 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.07 ± 0.50 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.36 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log new file mode 100644 index 0000000..eca19dc --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 307.39 ± 0.70 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.75 ± 0.00 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log new file mode 100644 index 0000000..075f5ba --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 978.15 ± 1.18 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.15 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log new file mode 100644 index 0000000..669a6d8 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1089.54 ± 1.93 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 46.47 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log new file mode 100644 index 0000000..761a77a --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 863.15 ± 2.90 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.09 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..655dfd3 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 947.88 ± 1.69 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 46.48 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log new file mode 100644 index 0000000..8b649c5 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.59 ± 2.44 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.38 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log new file mode 100644 index 0000000..05d74ad --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 684.81 ± 1.14 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.97 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log new file mode 100644 index 0000000..27bd0d7 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 865.92 ± 1.53 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.40 ± 0.00 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log new file mode 100644 index 0000000..28a8543 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 630.67 ± 1.16 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.83 ± 0.01 | + +build: de219279 (6181) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log new file mode 100644 index 0000000..20052b6 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1305.67 ± 1.36 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 48.48 ± 0.11 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a9dca39 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1377.39 ± 0.62 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 47.91 ± 0.01 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log new file mode 100644 index 0000000..f19f729 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 875.74 ± 6.47 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 52.85 ± 0.12 | + +build: 1fe00296 (6182) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log new file mode 100644 index 0000000..3c88a39 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 957.61 ± 5.26 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 52.16 ± 0.08 | + +build: 1fe00296 (6182) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 0a326fc..5e650aa 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -28,7 +28,7 @@ **Backends in this dataset:** ROCm 7 RC + ROCWMMA + hipBLASLt, ROCm 7 RC (hipBLASLt), ROCm 7 RC (hipBLASLt OFF), ROCm 7 RC + ROCWMMA (hipBLASLt OFF), ROCm 6.4.3 (hipBLASLt), ROCm 6.4.3 (hipBLASLt OFF), ROCm 6.4.3 + ROCWMMA (hipBLASLt), ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF), Vulkan AMDVLK, Vulkan RADV -**ROCm 7 hipBLASLt policy:** Toolboxes ship with **hipBLASLt enabled** by default (`ROCBLAS_USE_HIPBLASLT=1`). The benchmark script also runs **hipBLASLt OFF** variants (`-hblt0`) to measure its effect. +**ROCm hipBLASLt policy:** Toolboxes ship with **hipBLASLt enabled** by default (`ROCBLAS_USE_HIPBLASLT=1`). The benchmark script also runs **hipBLASLt OFF** variants (`-hblt0`) to measure its effect. --- @@ -38,24 +38,24 @@ **Prompt Processing (pp512)** | Backend | 1st | 2nd | 3rd | | --- | ---: | ---: | ---: | -| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 9 | 5 | 0 | +| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 9 | 6 | 0 | +| Vulkan AMDVLK | 4 | 0 | 2 | | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 3 | 3 | 8 | -| Vulkan AMDVLK | 3 | 0 | 2 | -| ROCm 7 RC + ROCWMMA + hipBLASLt | 1 | 8 | 4 | +| ROCm 7 RC + ROCWMMA + hipBLASLt | 1 | 8 | 5 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 0 | 0 | 1 | | Vulkan RADV | 0 | 0 | 1 | **Token Generation (tg128)** | Backend | 1st | 2nd | 3rd | | --- | ---: | ---: | ---: | -| Vulkan RADV | 13 | 0 | 0 | +| Vulkan RADV | 14 | 0 | 0 | | ROCm 6.4.3 (hipBLASLt) | 3 | 0 | 1 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 1 | 4 | 3 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 1 | 2 | 4 | | ROCm 6.4.3 (hipBLASLt OFF) | 1 | 1 | 1 | -| ROCm 7 RC (hipBLASLt OFF) | 1 | 1 | 1 | +| ROCm 7 RC (hipBLASLt) | 1 | 1 | 4 | +| ROCm 7 RC (hipBLASLt OFF) | 1 | 1 | 2 | | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 1 | 1 | 1 | -| ROCm 7 RC (hipBLASLt) | 1 | 0 | 4 | | Vulkan AMDVLK | 0 | 10 | 0 | | ROCm 7 RC + ROCWMMA + hipBLASLt | 0 | 1 | 2 | @@ -63,20 +63,20 @@ For any model+quant where both backends succeeded, this counts who was faster (ties when equal). | Comparison | Test | A wins | B wins | Ties | Total | | --- | --- | ---: | ---: | ---: | ---: | -| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan AMDVLK | pp512 | 11 | 4 | 0 | 15 | -| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan AMDVLK | tg128 | 4 | 10 | 1 | 15 | -| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan RADV | pp512 | 14 | 2 | 0 | 16 | -| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan RADV | tg128 | 3 | 13 | 0 | 16 | -| Vulkan AMDVLK vs Vulkan RADV | pp512 | 13 | 2 | 0 | 15 | -| Vulkan AMDVLK vs Vulkan RADV | tg128 | 2 | 13 | 0 | 15 | +| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan AMDVLK | pp512 | 11 | 5 | 0 | 16 | +| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan AMDVLK | tg128 | 4 | 11 | 1 | 16 | +| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan RADV | pp512 | 15 | 2 | 0 | 17 | +| ROCm 7 RC + ROCWMMA + hipBLASLt vs Vulkan RADV | tg128 | 3 | 14 | 0 | 17 | +| Vulkan AMDVLK vs Vulkan RADV | pp512 | 14 | 2 | 0 | 16 | +| Vulkan AMDVLK vs Vulkan RADV | tg128 | 2 | 14 | 0 | 16 | ### Average ranks **Prompt Processing (pp512)** | Backend | Avg Rank (↓ is better) | | --- | ---: | -| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 1.36 | -| Vulkan AMDVLK | 1.8 | -| ROCm 7 RC + ROCWMMA + hipBLASLt | 2.23 | +| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 1.4 | +| Vulkan AMDVLK | 1.67 | +| ROCm 7 RC + ROCWMMA + hipBLASLt | 2.29 | | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 2.36 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 3.0 | | Vulkan RADV | 3.0 | @@ -88,11 +88,11 @@ For any model+quant where both backends succeeded, this counts who was faster (t | ROCm 6.4.3 (hipBLASLt) | 1.5 | | Vulkan AMDVLK | 2.0 | | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 2.0 | -| ROCm 7 RC (hipBLASLt OFF) | 2.0 | | ROCm 6.4.3 (hipBLASLt OFF) | 2.0 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 2.25 | +| ROCm 7 RC (hipBLASLt OFF) | 2.25 | | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 2.43 | -| ROCm 7 RC (hipBLASLt) | 2.6 | +| ROCm 7 RC (hipBLASLt) | 2.5 | | ROCm 7 RC + ROCWMMA + hipBLASLt | 2.67 | --- @@ -103,47 +103,47 @@ For any model+quant where both backends succeeded, this counts who was faster (t Median % change when **Flash Attention ON vs OFF**, paired by model+quant, per backend: | Backend | pp512 Δ% (median, min..max, n) | tg128 Δ% (median, min..max, n) | | --- | --- | --- | -| ROCm 7 RC + ROCWMMA + hipBLASLt | 8.4% (3.6..65.6), n=14 | -1.1% (-8.2..-0.3), n=14 | -| ROCm 7 RC (hipBLASLt) | -20.2% (-27.8..6.5), n=10 | -1.4% (-8.5..3.0), n=10 | -| ROCm 7 RC (hipBLASLt OFF) | -20.4% (-28.2..-16.1), n=9 | -1.9% (-8.6..0.1), n=9 | -| ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 5.8% (1.3..24.1), n=16 | -1.1% (-7.4..15.1), n=16 | -| ROCm 6.4.3 (hipBLASLt) | -19.5% (-25.7..-11.9), n=12 | -1.2% (-6.9..0.8), n=12 | -| ROCm 6.4.3 (hipBLASLt OFF) | -10.3% (-22.3..3.6), n=9 | -1.6% (-11.1..0.0), n=9 | -| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 10.9% (3.9..25.7), n=15 | -0.4% (-7.5..3.0), n=15 | -| ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 6.4% (1.8..12.3), n=10 | -0.6% (-6.5..2.3), n=10 | -| Vulkan AMDVLK | 1.1% (-45.4..20.2), n=15 | -1.5% (-28.6..0.1), n=15 | -| Vulkan RADV | 3.4% (-2.6..12.5), n=16 | 0.0% (-5.8..2.4), n=16 | +| ROCm 7 RC + ROCWMMA + hipBLASLt | 8.8% (3.6..65.6), n=15 | -1.2% (-8.2..-0.3), n=15 | +| ROCm 7 RC (hipBLASLt) | -20.7% (-30.1..6.5), n=11 | -0.9% (-8.5..3.0), n=11 | +| ROCm 7 RC (hipBLASLt OFF) | -22.9% (-28.2..-16.1), n=10 | -1.5% (-8.6..0.1), n=10 | +| ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 5.8% (1.3..24.1), n=17 | -1.4% (-7.4..15.1), n=17 | +| ROCm 6.4.3 (hipBLASLt) | -20.9% (-29.8..-11.9), n=13 | -1.2% (-6.9..0.8), n=13 | +| ROCm 6.4.3 (hipBLASLt OFF) | -10.9% (-22.3..3.6), n=10 | -1.4% (-11.1..0.0), n=10 | +| ROCm 6.4.3 + ROCWMMA (hipBLASLt) | 11.3% (3.9..25.7), n=16 | -0.7% (-7.5..3.0), n=16 | +| ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 5.9% (1.8..12.3), n=11 | -0.9% (-6.5..2.3), n=11 | +| Vulkan AMDVLK | 1.1% (-45.4..20.2), n=16 | -1.3% (-28.6..0.1), n=16 | +| Vulkan RADV | 3.7% (-2.6..12.5), n=17 | 0.0% (-5.8..2.4), n=17 | ### Impact of ROCWMMA | Context | Test | Compared Envs | Pairs | Median Δ% | | --- | --- | --- | ---: | ---: | -| ROCm 7 RC (hipBLASLt) | pp512 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC (hipBLASLt) | 16 | 16.3% | -| ROCm 7 RC (hipBLASLt) | tg128 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC (hipBLASLt) | 16 | -0.7% | -| ROCm 7 RC (hipBLASLt OFF) | pp512 | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) vs ROCm 7 RC (hipBLASLt OFF) | 15 | 14.6% | -| ROCm 7 RC (hipBLASLt OFF) | tg128 | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) vs ROCm 7 RC (hipBLASLt OFF) | 15 | -0.7% | -| ROCm 6.4.3 (hipBLASLt) | pp512 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt) | 15 | 17.4% | -| ROCm 6.4.3 (hipBLASLt) | tg128 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt) | 15 | -0.3% | -| ROCm 6.4.3 (hipBLASLt OFF) | pp512 | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) vs ROCm 6.4.3 (hipBLASLt OFF) | 9 | 10.2% | -| ROCm 6.4.3 (hipBLASLt OFF) | tg128 | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) vs ROCm 6.4.3 (hipBLASLt OFF) | 9 | 0.3% | +| ROCm 7 RC (hipBLASLt) | pp512 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC (hipBLASLt) | 17 | 17.6% | +| ROCm 7 RC (hipBLASLt) | tg128 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC (hipBLASLt) | 17 | -0.8% | +| ROCm 7 RC (hipBLASLt OFF) | pp512 | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) vs ROCm 7 RC (hipBLASLt OFF) | 16 | 14.6% | +| ROCm 7 RC (hipBLASLt OFF) | tg128 | ROCm 7 RC + ROCWMMA (hipBLASLt OFF) vs ROCm 7 RC (hipBLASLt OFF) | 16 | -0.9% | +| ROCm 6.4.3 (hipBLASLt) | pp512 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt) | 16 | 17.5% | +| ROCm 6.4.3 (hipBLASLt) | tg128 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt) | 16 | -0.3% | +| ROCm 6.4.3 (hipBLASLt OFF) | pp512 | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) vs ROCm 6.4.3 (hipBLASLt OFF) | 10 | 9.7% | +| ROCm 6.4.3 (hipBLASLt OFF) | tg128 | ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) vs ROCm 6.4.3 (hipBLASLt OFF) | 10 | 0.2% | ### Impact of hipBLASLt | Context | Test | Compared Envs | Pairs | Median Δ% | | --- | --- | --- | ---: | ---: | -| ROCm 7 RC (no ROCWMMA) | pp512 | ROCm 7 RC (hipBLASLt) vs ROCm 7 RC (hipBLASLt OFF) | 15 | -0.2% | -| ROCm 7 RC (no ROCWMMA) | tg128 | ROCm 7 RC (hipBLASLt) vs ROCm 7 RC (hipBLASLt OFF) | 15 | -0.1% | -| ROCm 7 RC + ROCWMMA | pp512 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 16 | 1.4% | -| ROCm 7 RC + ROCWMMA | tg128 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 16 | 0.0% | -| ROCm 6.4.3 (no ROCWMMA) | pp512 | ROCm 6.4.3 (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt OFF) | 9 | 155.5% | -| ROCm 6.4.3 (no ROCWMMA) | tg128 | ROCm 6.4.3 (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt OFF) | 9 | 0.0% | -| ROCm 6.4.3 + ROCWMMA | pp512 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 13 | 116.9% | -| ROCm 6.4.3 + ROCWMMA | tg128 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 13 | -0.0% | +| ROCm 7 RC (no ROCWMMA) | pp512 | ROCm 7 RC (hipBLASLt) vs ROCm 7 RC (hipBLASLt OFF) | 16 | 0.4% | +| ROCm 7 RC (no ROCWMMA) | tg128 | ROCm 7 RC (hipBLASLt) vs ROCm 7 RC (hipBLASLt OFF) | 16 | -0.1% | +| ROCm 7 RC + ROCWMMA | pp512 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 17 | 2.0% | +| ROCm 7 RC + ROCWMMA | tg128 | ROCm 7 RC + ROCWMMA + hipBLASLt vs ROCm 7 RC + ROCWMMA (hipBLASLt OFF) | 17 | 0.0% | +| ROCm 6.4.3 (no ROCWMMA) | pp512 | ROCm 6.4.3 (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt OFF) | 10 | 154.8% | +| ROCm 6.4.3 (no ROCWMMA) | tg128 | ROCm 6.4.3 (hipBLASLt) vs ROCm 6.4.3 (hipBLASLt OFF) | 10 | 0.0% | +| ROCm 6.4.3 + ROCWMMA | pp512 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 14 | 117.0% | +| ROCm 6.4.3 + ROCWMMA | tg128 | ROCm 6.4.3 + ROCWMMA (hipBLASLt) vs ROCm 6.4.3 + ROCWMMA (hipBLASLt OFF) | 14 | -0.0% | ### Vulkan: AMDVLK vs RADV Head-to-head wins with selected Flash Attention filter: | Test | AMDVLK wins | RADV wins | Ties | Total | | --- | ---: | ---: | ---: | ---: | -| pp512 | 13 | 2 | 0 | 15 | -| tg128 | 2 | 13 | 0 | 15 | +| pp512 | 14 | 2 | 0 | 16 | +| tg128 | 2 | 14 | 0 | 16 | --- diff --git a/docs/results.json b/docs/results.json index ccecf32..f632b47 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2025-08-17T10:57:41Z", + "generated_at": "2025-08-18T21:21:31Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ { @@ -14710,6 +14710,1006 @@ "hash": "1fe00296", "number": "6182" } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 981.76, + "tps_std": 1.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 49.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1096.97, + "tps_std": 5.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 48.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 348.0, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 49.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 367.46, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 48.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 978.3, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 49.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 686.88, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 48.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 348.07, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 49.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 307.39, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 48.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 978.15, + "tps_std": 1.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 49.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1089.54, + "tps_std": 1.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 46.47, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 863.15, + "tps_std": 2.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 49.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 947.88, + "tps_std": 1.69, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 46.48, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 979.59, + "tps_std": 2.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 49.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 684.81, + "tps_std": 1.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 48.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 865.92, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 49.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 630.67, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 48.83, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "de219279", + "number": "6181" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1305.67, + "tps_std": 1.36, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 48.48, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1377.39, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 47.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 875.74, + "tps_std": 6.47, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 52.85, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 957.61, + "tps_std": 5.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 52.16, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", + "build": { + "hash": "1fe00296", + "number": "6182" + } } ] } \ No newline at end of file