From a58d133c5ecfa414c738b70bb9e89e941a85f238 Mon Sep 17 00:00:00 2001
From: Donato Capitella <donato.capitella@reversec.com>
Date: Tue, 7 Apr 2026 17:49:16 +0100
Subject: [PATCH] chore: update llama.cpp patch to PR 21566 for gemma-4
 inference fix

---
 toolboxes/Dockerfile.rocm7-nightlies | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/toolboxes/Dockerfile.rocm7-nightlies b/toolboxes/Dockerfile.rocm7-nightlies
index 430334f..5b6d427 100644
--- a/toolboxes/Dockerfile.rocm7-nightlies
+++ b/toolboxes/Dockerfile.rocm7-nightlies
@@ -61,8 +61,9 @@ RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . \
 
 COPY llama-grammar.patch /tmp/llama-grammar.patch
 
+# PR 21566: test a fix for gemma-4 inference bug (CUDA: check for buffer overlap before fusing)
 RUN patch -p1 < /tmp/llama-grammar.patch \
-  && curl -sSL https://github.com/ggml-org/llama.cpp/pull/21506.patch | patch -p1 \
+  && curl -sSL https://github.com/ggml-org/llama.cpp/pull/21566.patch | patch -p1 \
   && cmake -S . -B build \
   -DGGML_HIP=ON \
   -DAMDGPU_TARGETS=gfx1151 \