diff --git a/toolboxes/Dockerfile.rocm-6.4.4 b/toolboxes/Dockerfile.rocm-6.4.4 index f23f256..c15f2cb 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4 +++ b/toolboxes/Dockerfile.rocm-6.4.4 @@ -6,7 +6,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False \ install \ make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ rocminfo radeontop 'rocm-*' 'rocblas-*' hipblas 'hipblas-*' \ - git vim rsync sudo tar xz \ + git vim rsync sudo tar xz patch \ && dnf clean all && rm -rf /var/cache/dnf/* # llama.cpp @@ -15,9 +15,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +COPY llama-grammar.patch /tmp/llama-grammar.patch + # build + install RUN git clean -xdf \ && git submodule update --recursive \ + && patch -p1 < /tmp/llama-grammar.patch \ && HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ cmake -S . -B build \ -DGGML_HIP=ON \ diff --git a/toolboxes/Dockerfile.rocm-7.2 b/toolboxes/Dockerfile.rocm-7.2 index 097b9ca..b92fc6c 100644 --- a/toolboxes/Dockerfile.rocm-7.2 +++ b/toolboxes/Dockerfile.rocm-7.2 @@ -22,7 +22,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False \ rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \ rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \ rocminfo radeontop \ - git-core vim sudo rsync \ + git-core vim sudo rsync patch \ && dnf clean all && rm -rf /var/cache/dnf/* # rocm env @@ -38,9 +38,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +COPY llama-grammar.patch /tmp/llama-grammar.patch + # build RUN git clean -xdf \ && git submodule update --recursive \ + && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -mllvm --amdgpu-unroll-threshold-local=600" \ diff --git a/toolboxes/Dockerfile.rocm7-nightlies b/toolboxes/Dockerfile.rocm7-nightlies index f86a22a..922c871 100644 --- a/toolboxes/Dockerfile.rocm7-nightlies +++ b/toolboxes/Dockerfile.rocm7-nightlies @@ -59,9 +59,10 @@ RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . \ && git clean -xdf \ && git submodule update --recursive +COPY llama-grammar.patch /tmp/llama-grammar.patch - -RUN cmake -S . -B build \ +RUN patch -p1 < /tmp/llama-grammar.patch \ + && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/toolboxes/Dockerfile.vulkan-amdvlk b/toolboxes/Dockerfile.vulkan-amdvlk index ebf78a2..ab72c79 100644 --- a/toolboxes/Dockerfile.vulkan-amdvlk +++ b/toolboxes/Dockerfile.vulkan-amdvlk @@ -6,7 +6,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ git vim \ make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \ vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \ - radeontop glslc wget \ + radeontop glslc wget patch \ && dnf clean all && rm -rf /var/cache/dnf/* # amdvlk @@ -21,9 +21,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +COPY llama-grammar.patch /tmp/llama-grammar.patch + # build RUN git clean -xdf \ && git submodule update --recursive \ + && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build -G Ninja \ -DGGML_VULKAN=ON \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/toolboxes/Dockerfile.vulkan-radv b/toolboxes/Dockerfile.vulkan-radv index 60d2295..d3dd1fd 100644 --- a/toolboxes/Dockerfile.vulkan-radv +++ b/toolboxes/Dockerfile.vulkan-radv @@ -6,7 +6,7 @@ RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ git vim \ make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \ vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \ - radeontop glslc \ + radeontop glslc patch \ && dnf clean all && rm -rf /var/cache/dnf/* # llama.cpp @@ -15,9 +15,12 @@ ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +COPY llama-grammar.patch /tmp/llama-grammar.patch + # build RUN git clean -xdf \ && git submodule update --recursive \ + && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build -G Ninja \ -DGGML_VULKAN=ON \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/toolboxes/llama-grammar.patch b/toolboxes/llama-grammar.patch new file mode 100644 index 0000000..ce6b9c3 --- /dev/null +++ b/toolboxes/llama-grammar.patch @@ -0,0 +1,13 @@ +From: KYmidnight +Issue: https://github.com/kyuz0/amd-strix-halo-toolboxes/issues/70 +Subject: Increase MAX_REPETITION_THRESHOLD for complex tool schemas + +--- a/src/llama-grammar.cpp ++++ b/src/llama-grammar.cpp +@@ -14,7 +14,7 @@ + + // allow to be at least defined in CMakeLists.txt + #ifndef MAX_REPETITION_THRESHOLD +-#define MAX_REPETITION_THRESHOLD 2000 ++#define MAX_REPETITION_THRESHOLD 100000 + #endif