From c7f4ffc34669501353455360a72208ff64827987 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 19 Nov 2025 07:35:56 +0000 Subject: [PATCH] updated rpc benchmakrs with long context --- ..._4_4-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...__rocm6_4_4-rocwmma__longctx32768__rpc.log | 10 + ...5__rocm6_4_4__hblt0__longctx32768__rpc.log | 10 + ...of-00005__rocm6_4_4__longctx32768__rpc.log | 10 + ...m7_1-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...05__rocm7_1-rocwmma__longctx32768__rpc.log | 10 + ...005__rocm7_1__hblt0__longctx32768__rpc.log | 36 + ...1-of-00005__rocm7_1__longctx32768__rpc.log | 21 + ...mma-improved__hblt0__longctx32768__rpc.log | 10 + ...ha-rocwmma-improved__longctx32768__rpc.log | 10 + ...lpha-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...rocm7_alpha-rocwmma__longctx32768__rpc.log | 10 + ..._rocm7_alpha__hblt0__longctx32768__rpc.log | 32 + ...-00005__rocm7_alpha__longctx32768__rpc.log | 10 + ...7_rc-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...5__rocm7_rc-rocwmma__longctx32768__rpc.log | 10 + ...05__rocm7_rc__hblt0__longctx32768__rpc.log | 21 + ...-of-00005__rocm7_rc__longctx32768__rpc.log | 21 + ..._4_4-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...__rocm6_4_4-rocwmma__longctx32768__rpc.log | 10 + ...4__rocm6_4_4__hblt0__longctx32768__rpc.log | 10 + ...of-00004__rocm6_4_4__longctx32768__rpc.log | 10 + ...m7_1-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...04__rocm7_1-rocwmma__longctx32768__rpc.log | 10 + ...004__rocm7_1__hblt0__longctx32768__rpc.log | 10 + ...1-of-00004__rocm7_1__longctx32768__rpc.log | 10 + ...mma-improved__hblt0__longctx32768__rpc.log | 10 + ...ha-rocwmma-improved__longctx32768__rpc.log | 10 + ...lpha-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...rocm7_alpha-rocwmma__longctx32768__rpc.log | 10 + ..._rocm7_alpha__hblt0__longctx32768__rpc.log | 10 + ...-00004__rocm7_alpha__longctx32768__rpc.log | 10 + ...7_rc-rocwmma__hblt0__longctx32768__rpc.log | 10 + ...4__rocm7_rc-rocwmma__longctx32768__rpc.log | 10 + ...04__rocm7_rc__hblt0__longctx32768__rpc.log | 10 + ...-of-00004__rocm7_rc__longctx32768__rpc.log | 21 + docs/results.json | 1832 ++++++++++++++++- 37 files changed, 2283 insertions(+), 1 deletion(-) create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log create mode 100644 benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..ed4112d --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 6.54 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.81 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..4a45a6c --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 6.03 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.81 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..542c742 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.47 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.84 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log new file mode 100644 index 0000000..de38e35 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.99 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.82 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..f57a31e --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 8.59 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.81 ± 0.00 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..4221078 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 8.11 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.81 ± 0.00 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..0688c57 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log @@ -0,0 +1,36 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +:0:rocdevice.cpp :3580: 143267106124 us: Callback: Queue 0x7f2f8a400000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +Kernel Name: _ZL15flash_attn_tileILi128ELi128ELi16ELi4ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil +VGPU=0xe824a70 SWq=0x7f308c9e2000, HWq=0x7f2f8a400000, id=2 + Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 + grid=[4096, 8, 24], workgroup=[32, 8, 1] + private_seg_size=0, group_seg_size=33792 + kernel_obj=0x7f11bd42f700, kernarg_address=0x0x7f2f8a201e80 + completion_signal=0x0, correlation_id=0 + rptr=23, wptr=24 + /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f30cc2c5565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f30cc2c592b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f30cc2c5aaf] +/usr/local/lib64/libggml-hip.so.0(+0x294e092) [0x7f30cecd0092] +/usr/local/lib64/libggml-hip.so.0(+0x295f107) [0x7f30cece1107] +/usr/local/lib64/libggml-hip.so.0(+0x295d9dd) [0x7f30cecdf9dd] +/usr/local/lib64/libggml-hip.so.0(+0x295c95d) [0x7f30cecde95d] +/usr/local/lib64/libggml-hip.so.0(+0x29575c7) [0x7f30cecd95c7] +/usr/local/lib64/libggml-hip.so.0(+0x29540ea) [0x7f30cecd60ea] +/usr/local/lib64/libggml-hip.so.0(+0x295319f) [0x7f30cecd519f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f30cc2dfde3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f30cf378650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f30cf37a2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f30cf37f1bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f30cf38000e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f30cbc5b5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f30cbc5b668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log new file mode 100644 index 0000000..52ece0c --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log @@ -0,0 +1,21 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f15cac25565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f15cac2592b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f15cac25aaf] +/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7f15cacd3195] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f15cac3fde3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f15cdcd8650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f15cdcda2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f15cdcdf1bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f15cdce000e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f15ca5bb5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f15ca5bb668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..5406485 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 10.48 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log new file mode 100644 index 0000000..8dda401 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 9.05 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..1ee3e8d --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 6.98 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.66 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..17d0ba7 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 6.61 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.66 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..5d53db1 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log @@ -0,0 +1,32 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +:0:rocdevice.cpp :3588: 155339106534 us: Callback: Queue 0x7f48ed200000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +Kernel Name: _ZL15flash_attn_tileILi128ELi128ELi16ELi4ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil +VGPU=0x1eb0d870 SWq=0x7f48efd6a000, HWq=0x7f48ed200000, id=2 + Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 + grid=[4096, 8, 24], workgroup=[32, 8, 1] + private_seg_size=0, group_seg_size=33792 + kernel_obj=0x7f48ed02f700, kernarg_address=0x0x7f48ec41e800 + completion_signal=0x0, correlation_id=0 + rptr=5346, wptr=5361 + /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f48fc619565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f48fc61992b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f48fc619aaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f48fef92f12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f48fef9aa66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f48fef97fcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f48fc633de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f48ff666650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f48ff6682e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f48ff66d1bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f48ff66e00e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f48fbfaf5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f48fbfaf668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log new file mode 100644 index 0000000..c599711 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.79 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..9cdd3d9 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 8.70 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.81 ± 0.00 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..7cc9768 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 8.13 ± 0.00 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.81 ± 0.00 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..b1b7fb2 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log @@ -0,0 +1,21 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7faca1d2c565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7faca1d2c92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7faca1d2caaf] +/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7faca1dda195] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7faca1d46de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7faca4ddf650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7faca4de12e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7faca4de61bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7faca4de700e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7faca16c25b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7faca16c2668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log new file mode 100644 index 0000000..ccb3cd5 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log @@ -0,0 +1,21 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fb39df29565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb39df2992b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb39df29aaf] +/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7fb39dfd7195] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fb39df43de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fb3a0fdc650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fb3a0fde2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fb3a0fe31bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fb3a0fe400e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7fb39d8bf5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb39d8bf668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..26f8825 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 20.68 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.06 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..e076a46 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 20.85 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.07 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..3152c83 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 56.85 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.68 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log new file mode 100644 index 0000000..707a73d --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 55.76 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.75 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..4c41f7e --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.02 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.07 ± 0.00 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..74bffd0 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 28.97 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.06 ± 0.00 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..db0389d --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 61.44 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.68 ± 0.00 | + +build: 677be4d78 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log new file mode 100644 index 0000000..6710d33 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 59.64 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.66 ± 0.00 | + +build: 677be4d78 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..393c0c1 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.46 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.66 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log new file mode 100644 index 0000000..95f2e42 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.62 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..60d9930 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.40 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.84 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..a65306e --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.47 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.84 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..1ba09fd --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 55.05 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.69 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log new file mode 100644 index 0000000..6558c79 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 55.38 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.70 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..c400d98 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.15 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.07 ± 0.00 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log new file mode 100644 index 0000000..be4564b --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.32 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.07 ± 0.00 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log new file mode 100644 index 0000000..750fb19 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 57.81 ± 0.00 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.67 ± 0.00 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log new file mode 100644 index 0000000..fd4e0d7 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log @@ -0,0 +1,21 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a02e06565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a02e0692b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a02e06aaf] +/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7f1a02eb4195] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a02e20de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a05eb9650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a05ebb2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a05ec01bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a05ec100e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x408087] +/lib64/libc.so.6(+0x35b5) [0x7f1a0279c5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a0279c668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/docs/results.json b/docs/results.json index 867c25f..72568e0 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2025-11-17T23:08:12Z", + "generated_at": "2025-11-19T07:33:18Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ { @@ -40628,6 +40628,62 @@ "number": "7034" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 6.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -40684,6 +40740,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 6.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -40740,6 +40852,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -40796,6 +40964,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 13.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -40852,6 +41076,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 8.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -40908,6 +41188,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 8.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -40964,6 +41300,31 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": null + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41020,6 +41381,31 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log", + "rpc": true, + "build": null + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41076,6 +41462,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 10.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41132,6 +41574,62 @@ "number": "7074" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 9.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41188,6 +41686,62 @@ "number": "7074" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 6.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41244,6 +41798,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 6.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41300,6 +41910,31 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": null + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41325,6 +41960,62 @@ "rpc": true, "build": null }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 12.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41381,6 +42072,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 8.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41437,6 +42184,62 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 8.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41493,6 +42296,31 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": null + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41549,6 +42377,31 @@ "number": "7085" } }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log", + "rpc": true, + "build": null + }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", @@ -41605,6 +42458,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 20.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41661,6 +42570,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 20.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41717,6 +42682,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 56.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41773,6 +42794,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 55.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41829,6 +42906,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 29.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41885,6 +43018,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 28.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41941,6 +43130,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 61.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -41997,6 +43242,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 59.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42053,6 +43354,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 34.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42109,6 +43466,62 @@ "number": "7074" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 34.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42165,6 +43578,62 @@ "number": "7074" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42221,6 +43690,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42277,6 +43802,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 55.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42333,6 +43914,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 55.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42389,6 +44026,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 29.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42445,6 +44138,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 29.32, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42501,6 +44250,62 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 57.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", @@ -42557,6 +44362,31 @@ "number": "7085" } }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log", + "rpc": true, + "build": null + }, { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL",