From a58d133c5ecfa414c738b70bb9e89e941a85f238 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Tue, 7 Apr 2026 17:49:16 +0100 Subject: [PATCH] chore: update llama.cpp patch to PR 21566 for gemma-4 inference fix --- toolboxes/Dockerfile.rocm7-nightlies | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolboxes/Dockerfile.rocm7-nightlies b/toolboxes/Dockerfile.rocm7-nightlies index 430334f..5b6d427 100644 --- a/toolboxes/Dockerfile.rocm7-nightlies +++ b/toolboxes/Dockerfile.rocm7-nightlies @@ -61,8 +61,9 @@ RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . \ COPY llama-grammar.patch /tmp/llama-grammar.patch +# PR 21566: test a fix for gemma-4 inference bug (CUDA: check for buffer overlap before fusing) RUN patch -p1 < /tmp/llama-grammar.patch \ - && curl -sSL https://github.com/ggml-org/llama.cpp/pull/21506.patch | patch -p1 \ + && curl -sSL https://github.com/ggml-org/llama.cpp/pull/21566.patch | patch -p1 \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \