patch: increasing MAX_REPETITION_THRESHOLD to allow complex agentic workflows

This commit is contained in:
Donato Capitella
2026-03-25 09:23:03 +00:00
parent eb03432a50
commit ca84f4cbf3
6 changed files with 32 additions and 6 deletions
+4 -1
View File
@@ -6,7 +6,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False \
install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \
rocminfo radeontop 'rocm-*' 'rocblas-*' hipblas 'hipblas-*' \
git vim rsync sudo tar xz \
git vim rsync sudo tar xz patch \
&& dnf clean all && rm -rf /var/cache/dnf/*
# llama.cpp
@@ -15,9 +15,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git
ARG BRANCH=master
RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} .
COPY llama-grammar.patch /tmp/llama-grammar.patch
# build + install
RUN git clean -xdf \
&& git submodule update --recursive \
&& patch -p1 < /tmp/llama-grammar.patch \
&& HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build \
-DGGML_HIP=ON \
+4 -1
View File
@@ -22,7 +22,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False \
rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \
rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \
rocminfo radeontop \
git-core vim sudo rsync \
git-core vim sudo rsync patch \
&& dnf clean all && rm -rf /var/cache/dnf/*
# rocm env
@@ -38,9 +38,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git
ARG BRANCH=master
RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} .
COPY llama-grammar.patch /tmp/llama-grammar.patch
# build
RUN git clean -xdf \
&& git submodule update --recursive \
&& patch -p1 < /tmp/llama-grammar.patch \
&& cmake -S . -B build \
-DGGML_HIP=ON \
-DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -mllvm --amdgpu-unroll-threshold-local=600" \
+3 -2
View File
@@ -59,9 +59,10 @@ RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . \
&& git clean -xdf \
&& git submodule update --recursive
COPY llama-grammar.patch /tmp/llama-grammar.patch
RUN cmake -S . -B build \
RUN patch -p1 < /tmp/llama-grammar.patch \
&& cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
-DCMAKE_BUILD_TYPE=Release \
+4 -1
View File
@@ -6,7 +6,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False install \
git vim \
make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \
vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \
radeontop glslc wget \
radeontop glslc wget patch \
&& dnf clean all && rm -rf /var/cache/dnf/*
# amdvlk
@@ -21,9 +21,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git
ARG BRANCH=master
RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} .
COPY llama-grammar.patch /tmp/llama-grammar.patch
# build
RUN git clean -xdf \
&& git submodule update --recursive \
&& patch -p1 < /tmp/llama-grammar.patch \
&& cmake -S . -B build -G Ninja \
-DGGML_VULKAN=ON \
-DCMAKE_BUILD_TYPE=Release \
+4 -1
View File
@@ -6,7 +6,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False install \
git vim \
make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \
vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \
radeontop glslc \
radeontop glslc patch \
&& dnf clean all && rm -rf /var/cache/dnf/*
# llama.cpp
@@ -15,9 +15,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git
ARG BRANCH=master
RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} .
COPY llama-grammar.patch /tmp/llama-grammar.patch
# build
RUN git clean -xdf \
&& git submodule update --recursive \
&& patch -p1 < /tmp/llama-grammar.patch \
&& cmake -S . -B build -G Ninja \
-DGGML_VULKAN=ON \
-DCMAKE_BUILD_TYPE=Release \
+13
View File
@@ -0,0 +1,13 @@
From: KYmidnight
Issue: https://github.com/kyuz0/amd-strix-halo-toolboxes/issues/70
Subject: Increase MAX_REPETITION_THRESHOLD for complex tool schemas
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -14,7 +14,7 @@
// allow to be at least defined in CMakeLists.txt
#ifndef MAX_REPETITION_THRESHOLD
-#define MAX_REPETITION_THRESHOLD 2000
+#define MAX_REPETITION_THRESHOLD 100000
#endif