diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..019b7bd --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.41 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 4.12 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log new file mode 100644 index 0000000..2339073 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 101.82 ± 0.34 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.71 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log new file mode 100644 index 0000000..7d68b86 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.93 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 4.13 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log new file mode 100644 index 0000000..09254e0 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 95.55 ± 0.26 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.78 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..d7afa2a --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.59 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.63 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__rpc.log new file mode 100644 index 0000000..9e62d86 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 103.11 ± 0.08 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.11 ± 0.03 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__longctx32768__rpc.log new file mode 100644 index 0000000..e5e59f5 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.03 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.63 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__rpc.log new file mode 100644 index 0000000..0abf42d --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 87.98 ± 0.29 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.10 ± 0.02 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..a1b8c39 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.20 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.60 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__rpc.log new file mode 100644 index 0000000..3a595d0 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 102.58 ± 0.04 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.74 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__longctx32768__rpc.log new file mode 100644 index 0000000..fb86185 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.88 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.58 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__rpc.log new file mode 100644 index 0000000..16e6be3 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 93.44 ± 0.29 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.71 ± 0.01 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__longctx32768__rpc.log new file mode 100644 index 0000000..bff85ad --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__longctx32768__rpc.log @@ -0,0 +1,19 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:724: Remote RPC server crashed or returned malformed response +/lib64/libggml-base.so.0(+0x35a5) [0x7f92f39eb5a5] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f92f39eb96b] +/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f92f39ebaef] +/lib64/libggml-rpc.so.0(+0x5b4a) [0x7f92f7010b4a] +/lib64/libggml-base.so.0(+0x171b2) [0x7f92f39ff1b2] +/lib64/libggml-base.so.0(+0x1749f) [0x7f92f39ff49f] +/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0x19) [0x7f92f3a00509] +/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3c61) [0x7f92f72603c1] +/lib64/libllama.so.0(+0x25568) [0x7f92f71b6568] +/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f92f71b73cc] +/usr/sbin/llama-bench() [0x4077b5] +/lib64/libc.so.6(+0x35b5) [0x7f92f33815b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f92f3381668] +/usr/sbin/llama-bench() [0x409cf5] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__rpc.log new file mode 100644 index 0000000..6f7a3a0 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__rpc.log @@ -0,0 +1,19 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:724: Remote RPC server crashed or returned malformed response +/lib64/libggml-base.so.0(+0x35a5) [0x7f4efadba5a5] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4efadba96b] +/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f4efadbaaef] +/lib64/libggml-rpc.so.0(+0x5b4a) [0x7f4efe3dfb4a] +/lib64/libggml-base.so.0(+0x171b2) [0x7f4efadce1b2] +/lib64/libggml-base.so.0(+0x1749f) [0x7f4efadce49f] +/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0x19) [0x7f4efadcf509] +/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3c61) [0x7f4efe62f3c1] +/lib64/libllama.so.0(+0x25568) [0x7f4efe585568] +/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f4efe5863cc] +/usr/sbin/llama-bench() [0x4077b5] +/lib64/libc.so.6(+0x35b5) [0x7f4efa7505b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4efa750668] +/usr/sbin/llama-bench() [0x409cf5] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__longctx32768__rpc.log new file mode 100644 index 0000000..13c4edf --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__longctx32768__rpc.log @@ -0,0 +1 @@ +Error: unable to find user kyuz0: no matching entries in passwd file diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__rpc.log new file mode 100644 index 0000000..13c4edf --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__rpc.log @@ -0,0 +1 @@ +Error: unable to find user kyuz0: no matching entries in passwd file diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..ed18edd --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 59.80 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.45 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log new file mode 100644 index 0000000..7578042 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 172.78 ± 2.43 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.17 ± 0.05 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log new file mode 100644 index 0000000..3ea4755 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 59.95 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.45 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log new file mode 100644 index 0000000..1978468 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.98 ± 1.76 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.17 ± 0.04 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..d67ecf9 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 60.12 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.04 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__rpc.log new file mode 100644 index 0000000..733c3da --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 157.51 ± 1.13 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.24 ± 0.10 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__longctx32768__rpc.log new file mode 100644 index 0000000..2b480f3 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 60.47 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.04 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__rpc.log new file mode 100644 index 0000000..6d1ea52 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 162.36 ± 1.16 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.23 ± 0.08 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..357c875 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 62.07 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.40 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__rpc.log new file mode 100644 index 0000000..3825c61 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 160.68 ± 0.44 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.97 ± 0.02 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__longctx32768__rpc.log new file mode 100644 index 0000000..c3bf1b7 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__longctx32768__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 61.38 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.40 ± 0.00 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__rpc.log new file mode 100644 index 0000000..637178a --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__rpc.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 159.35 ± 0.53 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.98 ± 0.03 | + +build: 2656c0d26 (7693) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__longctx32768__rpc.log new file mode 100644 index 0000000..ce783c3 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__longctx32768__rpc.log @@ -0,0 +1,3 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +Failed to connect to 10.0.0.1:50052 diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log new file mode 100644 index 0000000..264c27f --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log @@ -0,0 +1,19 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:724: Remote RPC server crashed or returned malformed response +/lib64/libggml-base.so.0(+0x35a5) [0x7f7c046f25a5] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f7c046f296b] +/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f7c046f2aef] +/lib64/libggml-rpc.so.0(+0x5b4a) [0x7f7c07d17b4a] +/lib64/libggml-base.so.0(+0x171b2) [0x7f7c047061b2] +/lib64/libggml-base.so.0(+0x1749f) [0x7f7c0470649f] +/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0x19) [0x7f7c04707509] +/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3c61) [0x7f7c07f673c1] +/lib64/libllama.so.0(+0x25568) [0x7f7c07ebd568] +/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f7c07ebe3cc] +/usr/sbin/llama-bench() [0x4077b5] +/lib64/libc.so.6(+0x35b5) [0x7f7c040885b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f7c04088668] +/usr/sbin/llama-bench() [0x409cf5] diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__longctx32768__rpc.log new file mode 100644 index 0000000..381ed94 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__longctx32768__rpc.log @@ -0,0 +1,19 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +Failed to connect to 10.0.0.1:50052 +radv/amdgpu: Failed to allocate a buffer: +radv/amdgpu: size : 990904320 bytes +radv/amdgpu: alignment : 262144 bytes +radv/amdgpu: domains : 4 +radv/amdgpu: Failed to allocate a buffer: +radv/amdgpu: size : 990904320 bytes +radv/amdgpu: alignment : 262144 bytes +radv/amdgpu: domains : 4 +radv/amdgpu: Failed to allocate a buffer: +radv/amdgpu: size : 990904320 bytes +radv/amdgpu: alignment : 262144 bytes +radv/amdgpu: domains : 4 +radv/amdgpu: Failed to allocate a buffer: +radv/amdgpu: size : 990904320 bytes +radv/amdgpu: alignment : 262144 bytes +radv/amdgpu: domains : 4 diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__rpc.log new file mode 100644 index 0000000..f1f44b3 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__rpc.log @@ -0,0 +1,19 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:724: Remote RPC server crashed or returned malformed response +/lib64/libggml-base.so.0(+0x35a5) [0x7fe6965fe5a5] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe6965fe96b] +/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe6965feaef] +/lib64/libggml-rpc.so.0(+0x5b4a) [0x7fe699c23b4a] +/lib64/libggml-base.so.0(+0x171b2) [0x7fe6966121b2] +/lib64/libggml-base.so.0(+0x1749f) [0x7fe69661249f] +/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0x19) [0x7fe696613509] +/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3c61) [0x7fe699e733c1] +/lib64/libllama.so.0(+0x25568) [0x7fe699dc9568] +/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7fe699dca3cc] +/usr/sbin/llama-bench() [0x4077b5] +/lib64/libc.so.6(+0x35b5) [0x7fe695f945b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe695f94668] +/usr/sbin/llama-bench() [0x409cf5] diff --git a/docs/results.json b/docs/results.json index c8c6275..d36f20e 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2026-01-12T09:25:21Z", + "generated_at": "2026-01-12T13:18:12Z", "system_info": { "distro": "Fedora Linux 42 (Workstation Edition)", "kernel": "6.18.3-100.fc42.x86_64", @@ -8,6 +8,10 @@ "timestamp": "09 Jan 2026" }, "llamacpp_builds": [ + { + "hash": "2656c0d26", + "number": "7693" + }, { "hash": "9c142e3a2", "number": "7670" @@ -15221,6 +15225,1550 @@ "hash": "9c142e3a2", "number": "7670" } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 18.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 4.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 101.82, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 4.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 95.55, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 18.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 103.11, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.11, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 18.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 87.98, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.1, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7-nightlies__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 18.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 102.58, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 93.44, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1_1__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__longctx32768__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_amdvlk__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__longctx32768__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__vulkan_radv__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 59.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 172.78, + "tps_std": 2.43, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.17, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 59.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 173.98, + "tps_std": 1.76, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.17, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 60.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 157.51, + "tps_std": 1.13, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.24, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 60.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 162.36, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.23, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7-nightlies__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 62.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 160.68, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.97, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 61.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 159.35, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.98, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1_1__rpc.log", + "rpc": true, + "build": { + "hash": "2656c0d26", + "number": "7693" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__longctx32768__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__longctx32768__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_radv__rpc.log", + "rpc": true, + "build": null } ] } \ No newline at end of file