From 4ec72fa8f49ae040c65cfaf2bb0c83f801bd93ee Mon Sep 17 00:00:00 2001 From: "S. Neuhaus" Date: Thu, 30 Oct 2025 18:11:28 +0100 Subject: [PATCH 01/21] Fix command syntax for llama-cli usage --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 97769b1..b5ae284 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ Once inside, the following commands show how to run local LLMs: * `llama-cli --list-devices` *Lists available GPU devices for Llama.cpp.* -* `llama-cli --no-mmap -ngl 999 -fa -m ` +* `llama-cli --no-mmap -ngl 999 -fa 1 -m ` *Runs inference on the specified model, with all layers on GPU and flash attention enabled (replace \*\* with your model path).* ## 2.3 Downloading GGUF Models from HuggingFace From 8b2cc6a0cc09275b4fe056ff9331d2ceac1b95cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Sun, 19 Oct 2025 23:11:43 +0200 Subject: [PATCH 02/21] feat: rocm-7alpha --- .github/workflows/build_and_publish.yml | 2 +- .github/workflows/prune-old-toolboxes.yml | 2 +- toolboxes/Dockerfile.rocm-7alpha | 124 ++++++++++++++++++++ toolboxes/Dockerfile.rocm-7alpha-rocwmma | 131 ++++++++++++++++++++++ 4 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 toolboxes/Dockerfile.rocm-7alpha create mode 100644 toolboxes/Dockerfile.rocm-7alpha-rocwmma diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index cb8e380..7c789fa 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -28,7 +28,7 @@ jobs: IN='${{ inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]' else # Remove spaces and build JSON array from comma list IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') diff --git a/.github/workflows/prune-old-toolboxes.yml b/.github/workflows/prune-old-toolboxes.yml index e3d0b33..43d71dd 100644 --- a/.github/workflows/prune-old-toolboxes.yml +++ b/.github/workflows/prune-old-toolboxes.yml @@ -44,7 +44,7 @@ jobs: run: | IN='${{ github.event.inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7beta","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7beta","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' else IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') JSON='["'${IN_CLEAN//,/\",\"}'"]' diff --git a/toolboxes/Dockerfile.rocm-7alpha b/toolboxes/Dockerfile.rocm-7alpha new file mode 100644 index 0000000..b6bd81e --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7alpha @@ -0,0 +1,124 @@ +# build +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build tar xz aria2c \ + && dnf clean all && rm -rf /var/cache/dnf/* + +# find & fetch the latest Linux 7.x.x tarball (gfx1151) +WORKDIR /tmp +ARG ROCM_MAJOR_VER=7 +ARG GFX=gfx1151 +RUN set -euo pipefail; \ + BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ + PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ + KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ + | tr '<' '\n' \ + | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ + | sort -V | tail -n1)"; \ + echo "Latest tarball: ${KEY}"; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz +RUN mkdir -p /opt/rocm-7.0 \ + && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive + +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_HIP_UMA=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# keep bin; drop headers/docs/static libs; drop source tree +RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ + && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ + /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \ + && rm -rf /opt/llama.cpp + +# runtime +FROM registry.fedoraproject.org/fedora-minimal:rawhide + +RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim \ + && microdnf clean all && rm -rf /var/cache/dnf/* + +COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 +COPY --from=builder /usr/local/ /usr/local/ + +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# make /usr/local libs visible without touching env +RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig + +CMD ["/bin/bash"] diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma b/toolboxes/Dockerfile.rocm-7alpha-rocwmma new file mode 100644 index 0000000..f99c8d6 --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma @@ -0,0 +1,131 @@ +# build +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build tar xz aria2c \ + && dnf clean all && rm -rf /var/cache/dnf/* + +# find & fetch the latest Linux 7.x.x tarball (gfx1151) +WORKDIR /tmp +ARG ROCM_MAJOR_VER=7 +ARG GFX=gfx1151 +RUN set -euo pipefail; \ + BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ + PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ + KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ + | tr '<' '\n' \ + | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ + | sort -V | tail -n1)"; \ + echo "Latest tarball: ${KEY}"; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz +RUN mkdir -p /opt/rocm-7.0 \ + && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +WORKDIR /opt +COPY ./build-rocwmma.sh . +RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh + +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive +COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh +RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp + +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_HIP_UMA=ON \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# keep bin; drop headers/docs/static libs; drop source tree +RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ + && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ + /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \ + && rm -rf /opt/llama.cpp + +# runtime +FROM registry.fedoraproject.org/fedora-minimal:rawhide + +RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim \ + && microdnf clean all && rm -rf /var/cache/dnf/* + +COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 +COPY --from=builder /usr/local/ /usr/local/ + +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# make /usr/local libs visible without touching env +RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig + +CMD ["/bin/bash"] From 12fcfc54bac52076aac6593c761335779f579b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Wed, 29 Oct 2025 09:10:42 +0100 Subject: [PATCH 03/21] feat: rocm-7alpha-rocwmma-improved --- .../Dockerfile.rocm-7alpha-rocwmma-improved | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved new file mode 100644 index 0000000..49bd48d --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved @@ -0,0 +1,137 @@ +# build +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build tar xz aria2c \ + && dnf clean all && rm -rf /var/cache/dnf/* + +# find & fetch the latest Linux 7.x.x tarball (gfx1151) +WORKDIR /tmp +ARG ROCM_MAJOR_VER=7 +ARG GFX=gfx1151 +RUN set -euo pipefail; \ + BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ + PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ + KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ + | tr '<' '\n' \ + | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ + | sort -V | tail -n1)"; \ + echo "Latest tarball: ${KEY}"; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz +RUN mkdir -p /opt/rocm-7.0 \ + && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +WORKDIR /opt +COPY ./build-rocwmma.sh . +RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh + +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive +COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh +RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp + +# Download and apply the PR 16827 patch +RUN aria2c -x 16 -s 16 -j 16 --file-allocation=none \ + "https://github.com/ggml-org/llama.cpp/pull/16827.patch?full_index=1" \ + -o 16827.patch +RUN git apply 16827.patch + +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_HIP_UMA=ON \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# keep bin; drop headers/docs/static libs; drop source tree +RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ + && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ + /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \ + && rm -rf /opt/llama.cpp + +# runtime +FROM registry.fedoraproject.org/fedora-minimal:rawhide + +RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim \ + && microdnf clean all && rm -rf /var/cache/dnf/* + +COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 +COPY --from=builder /usr/local/ /usr/local/ + +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# make /usr/local libs visible without touching env +RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig + +CMD ["/bin/bash"] From 2e9ea4da70cb93ce61aad1c0e1cc3359c9c57e33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Wed, 29 Oct 2025 11:25:15 +0100 Subject: [PATCH 04/21] fix: use rocm-wmma-tune directly --- toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved index 49bd48d..9f0b64e 100644 --- a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved @@ -58,18 +58,12 @@ COPY ./build-rocwmma.sh . RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh WORKDIR /opt/llama.cpp -RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ +RUN git clone --recursive -b rocm-wmma-tune https://github.com/lhl/llama.cpp.git . \ && git clean -xdf \ && git submodule update --recursive COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp -# Download and apply the PR 16827 patch -RUN aria2c -x 16 -s 16 -j 16 --file-allocation=none \ - "https://github.com/ggml-org/llama.cpp/pull/16827.patch?full_index=1" \ - -o 16827.patch -RUN git apply 16827.patch - RUN cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ From dd3ef613269a08926a21275e26c4ce2dd5350a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Sun, 9 Nov 2025 16:00:51 +0100 Subject: [PATCH 05/21] feat: align Dockerfile.rocm-7alpha-rocwmma to latest Dockerfile.rocm-7rc-rocwmma --- toolboxes/Dockerfile.rocm-7alpha-rocwmma | 9 ++------- toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma b/toolboxes/Dockerfile.rocm-7alpha-rocwmma index f99c8d6..3f3a795 100644 --- a/toolboxes/Dockerfile.rocm-7alpha-rocwmma +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma @@ -68,17 +68,11 @@ RUN cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ -DCMAKE_BUILD_TYPE=Release \ - -DLLAMA_HIP_UMA=ON \ + -DGGML_RPC=ON \ -DGGML_HIP_ROCWMMA_FATTN=ON \ && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release -# keep bin; drop headers/docs/static libs; drop source tree -RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ - && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ - /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \ - && rm -rf /opt/llama.cpp - # runtime FROM registry.fedoraproject.org/fedora-minimal:rawhide @@ -88,6 +82,7 @@ RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 COPY --from=builder /usr/local/ /usr/local/ +COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ COPY gguf-vram-estimator.py /usr/local/bin/ RUN chmod +x /usr/local/bin/gguf-vram-estimator.py diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved index 9f0b64e..bc57b30 100644 --- a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved @@ -68,17 +68,11 @@ RUN cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ -DCMAKE_BUILD_TYPE=Release \ - -DLLAMA_HIP_UMA=ON \ + -DGGML_RPC=ON \ -DGGML_HIP_ROCWMMA_FATTN=ON \ && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release -# keep bin; drop headers/docs/static libs; drop source tree -RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ - && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ - /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \ - && rm -rf /opt/llama.cpp - # runtime FROM registry.fedoraproject.org/fedora-minimal:rawhide @@ -88,6 +82,7 @@ RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 COPY --from=builder /usr/local/ /usr/local/ +COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ COPY gguf-vram-estimator.py /usr/local/bin/ RUN chmod +x /usr/local/bin/gguf-vram-estimator.py From 5e973c69d9c9ab13b65d7e0e138b4657783258ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Sun, 9 Nov 2025 16:05:48 +0100 Subject: [PATCH 06/21] feat: use hjc4869 llama.cpp port for Dockerfile.rocm-7alpha-rocwmma-improved --- toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved index bc57b30..b34dbf2 100644 --- a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved @@ -58,7 +58,7 @@ COPY ./build-rocwmma.sh . RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh WORKDIR /opt/llama.cpp -RUN git clone --recursive -b rocm-wmma-tune https://github.com/lhl/llama.cpp.git . \ +RUN git clone --recursive https://github.com/hjc4869/llama.cpp.git . \ && git clean -xdf \ && git submodule update --recursive COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh From 6d121bc88ad2542346eaff576c12d47944e560f4 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Mon, 10 Nov 2025 19:21:21 +0000 Subject: [PATCH 07/21] Merge PR#15405 to make RPC server faster --- toolboxes/Dockerfile.rocm-7rc-rocwmma | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/toolboxes/Dockerfile.rocm-7rc-rocwmma b/toolboxes/Dockerfile.rocm-7rc-rocwmma index 7b6cb8b..767d427 100644 --- a/toolboxes/Dockerfile.rocm-7rc-rocwmma +++ b/toolboxes/Dockerfile.rocm-7rc-rocwmma @@ -59,7 +59,10 @@ RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh WORKDIR /opt/llama.cpp RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ && git clean -xdf \ - && git submodule update --recursive + && git submodule update --recursive \ + && git fetch origin pull/15405/head:pr-15405 \ + && git merge --no-edit pr-15405 + COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp From 73f6a69310ef9bda860285907f81582b540e1dac Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Mon, 10 Nov 2025 19:29:30 +0000 Subject: [PATCH 08/21] fix --- toolboxes/Dockerfile.rocm-7rc-rocwmma | 2 ++ 1 file changed, 2 insertions(+) diff --git a/toolboxes/Dockerfile.rocm-7rc-rocwmma b/toolboxes/Dockerfile.rocm-7rc-rocwmma index 767d427..15b91fd 100644 --- a/toolboxes/Dockerfile.rocm-7rc-rocwmma +++ b/toolboxes/Dockerfile.rocm-7rc-rocwmma @@ -60,6 +60,8 @@ WORKDIR /opt/llama.cpp RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ && git clean -xdf \ && git submodule update --recursive \ + && git config user.email "builder@localhost" \ + && git config user.name "Container Builder" \ && git fetch origin pull/15405/head:pr-15405 \ && git merge --no-edit pr-15405 From abeec3526c34891152b01368d4b6ba0111ff725b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Wed, 12 Nov 2025 08:35:11 +0100 Subject: [PATCH 09/21] fix: rocm-7alpha pipelines and refresh-toolboxes.sh --- .github/workflows/build_and_publish.yml | 2 +- .github/workflows/prune-old-toolboxes.yml | 2 +- refresh-toolboxes.sh | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 7c789fa..9225a9a 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -28,7 +28,7 @@ jobs: IN='${{ inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]' else # Remove spaces and build JSON array from comma list IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') diff --git a/.github/workflows/prune-old-toolboxes.yml b/.github/workflows/prune-old-toolboxes.yml index 43d71dd..dcf6637 100644 --- a/.github/workflows/prune-old-toolboxes.yml +++ b/.github/workflows/prune-old-toolboxes.yml @@ -44,7 +44,7 @@ jobs: run: | IN='${{ github.event.inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7beta","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7beta","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' else IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') JSON='["'${IN_CLEAN//,/\",\"}'"]' diff --git a/refresh-toolboxes.sh b/refresh-toolboxes.sh index 9a4dbdd..6f69d9e 100755 --- a/refresh-toolboxes.sh +++ b/refresh-toolboxes.sh @@ -11,6 +11,9 @@ TOOLBOXES["llama-rocm-6.4.4"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4 TOOLBOXES["llama-rocm-6.4.4-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.4-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha-rocwmma-improved"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" function usage() { echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]" From 32f7667dcabb566e5ab5ac713cd018c0b133bc80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niccol=C3=B2=20Belli?= Date: Wed, 12 Nov 2025 08:39:48 +0100 Subject: [PATCH 10/21] fix: typo in refresh-toolboxes.sh --- refresh-toolboxes.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refresh-toolboxes.sh b/refresh-toolboxes.sh index 6f69d9e..770ac76 100755 --- a/refresh-toolboxes.sh +++ b/refresh-toolboxes.sh @@ -11,9 +11,9 @@ TOOLBOXES["llama-rocm-6.4.4"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4 TOOLBOXES["llama-rocm-6.4.4-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.4-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" -TOOLBOXES["llama-rocm-7alpha"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" -TOOLBOXES["llama-rocm-7alpha-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" -TOOLBOXES["llama-rocm-7alpha-rocwmma-improved"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha-rocwmma-improved"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha-rocwmma-improved --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" function usage() { echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]" From 52ee9d50f2b3b57f6fa6f79f92d5bc6d7d609fd9 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 07:46:55 +0000 Subject: [PATCH 11/21] fix rocm-6.4.4-rocwmma --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index cb44de3..54628ef 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -49,9 +49,8 @@ RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.c RUN git clean -xdf \ && git pull \ && git submodule update --recursive \ - && echo -e '#ifndef HIP_HAS_SHFL_SYNC_FUNCS\n#define HIP_HAS_SHFL_SYNC_FUNCS\n#ifndef __shfl_sync\n#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width)\n#endif\n#ifndef __shfl_xor_sync\n#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width)\n#endif\n#ifndef __shfl_up_sync\n#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width)\n#endif\n#endif\n' \ - | cat - /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh > /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \ - && mv /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \ + && bash -lc 'f=/opt/llama.cpp/ggml/src/ggml-cuda/mmid.cu; \ + grep -q "vendors/hip.h" "$f" || sed -i '\''1i #include "vendors/hip.h"'\'' "$f"' \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -62,9 +61,10 @@ RUN git clean -xdf \ -DHIP_PATH=/opt/rocm \ -DHIP_PLATFORM=amd \ -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm" \ - && cmake --build build --config Release -- -j$(nproc) \ + && cmake --build build --config Release -- -j"$(nproc)" \ && cmake --install build --config Release + # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig From e36bd3e4ecae229357054aeb065abd4ce2f9c799 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 07:55:03 +0000 Subject: [PATCH 12/21] trying another fix for rocm-6.4.4-rocwmma --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index 54628ef..a29b034 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -49,7 +49,7 @@ RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.c RUN git clean -xdf \ && git pull \ && git submodule update --recursive \ - && bash -lc 'f=/opt/llama.cpp/ggml/src/ggml-cuda/mmid.cu; \ + && bash -lc 'f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh; \ grep -q "vendors/hip.h" "$f" || sed -i '\''1i #include "vendors/hip.h"'\'' "$f"' \ && cmake -S . -B build \ -DGGML_HIP=ON \ @@ -65,6 +65,7 @@ RUN git clean -xdf \ && cmake --install build --config Release + # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig From be28cb2ad56d06f38d9a3d85e179ece54d12de79 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 08:28:03 +0000 Subject: [PATCH 13/21] Add HIP shuffle compatibility shim for gfx1151 builds --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index a29b034..ccc07f6 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -49,8 +49,23 @@ RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.c RUN git clean -xdf \ && git pull \ && git submodule update --recursive \ + && cat > /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h <<'EOF' +#ifndef HIP_SHFL_FIX_H +#define HIP_SHFL_FIX_H +#include "common.cuh" +#ifndef __shfl_sync +#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width) +#endif +#ifndef __shfl_up_sync +#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width) +#endif +#ifndef __shfl_xor_sync +#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width) +#endif +#endif +EOF && bash -lc 'f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh; \ - grep -q "vendors/hip.h" "$f" || sed -i '\''1i #include "vendors/hip.h"'\'' "$f"' \ + grep -q "hip_shfl_fix.h" "$f" || sed -i '\''1i #include "hip_shfl_fix.h"'\'' "$f"' \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -64,8 +79,6 @@ RUN git clean -xdf \ && cmake --build build --config Release -- -j"$(nproc)" \ && cmake --install build --config Release - - # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig From ff0ef125ccf7d3cb0bf5551007bdf3b15ac2daa1 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 09:43:12 +0000 Subject: [PATCH 14/21] Add HIP shuffle macro shim to restore __shfl_sync support on gfx1151 --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 26 ++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index ccc07f6..1c3e61d 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -49,23 +49,26 @@ RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.c RUN git clean -xdf \ && git pull \ && git submodule update --recursive \ + && rm -f /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \ && cat > /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h <<'EOF' #ifndef HIP_SHFL_FIX_H #define HIP_SHFL_FIX_H -#include "common.cuh" -#ifndef __shfl_sync -#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width) -#endif -#ifndef __shfl_up_sync -#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width) -#endif -#ifndef __shfl_xor_sync -#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width) +// Keep vendor shims if present, add only what’s missing. +#ifdef __HIP_PLATFORM_AMD__ + #ifndef __shfl_sync + #define __shfl_sync(mask,var,srcLane,width) __shfl((var),(srcLane),(width)) + #endif + #ifndef __shfl_up_sync + #define __shfl_up_sync(mask,var,delta,width) __shfl_up((var),(delta),(width)) + #endif + #ifndef __shfl_xor_sync + #define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor((var),(laneMask),(width)) + #endif #endif #endif EOF - && bash -lc 'f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh; \ - grep -q "hip_shfl_fix.h" "$f" || sed -i '\''1i #include "hip_shfl_fix.h"'\'' "$f"' \ + && f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh; \ + grep -q 'hip_shfl_fix.h' "$f" || sed -i '1i #include "hip_shfl_fix.h"' "$f" \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -79,6 +82,7 @@ EOF && cmake --build build --config Release -- -j"$(nproc)" \ && cmake --install build --config Release + # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig From b6de7881ddea14d4f397872ac8842762a84a2888 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 09:53:55 +0000 Subject: [PATCH 15/21] fix hredoc synatx in rocm-6.4.4-rocwmma --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index 1c3e61d..c4e1240 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -49,11 +49,9 @@ RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.c RUN git clean -xdf \ && git pull \ && git submodule update --recursive \ - && rm -f /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \ && cat > /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h <<'EOF' #ifndef HIP_SHFL_FIX_H #define HIP_SHFL_FIX_H -// Keep vendor shims if present, add only what’s missing. #ifdef __HIP_PLATFORM_AMD__ #ifndef __shfl_sync #define __shfl_sync(mask,var,srcLane,width) __shfl((var),(srcLane),(width)) @@ -67,8 +65,8 @@ RUN git clean -xdf \ #endif #endif EOF - && f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh; \ - grep -q 'hip_shfl_fix.h' "$f" || sed -i '1i #include "hip_shfl_fix.h"' "$f" \ + && f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \ + && grep -q 'hip_shfl_fix.h' "$f" || sed -i '1i #include "hip_shfl_fix.h"' "$f" \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ From 42bbc2301e0cfce48b34653590bf23a5f8524f3e Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 10:16:34 +0000 Subject: [PATCH 16/21] Force-include HIP shuffle shim to fix missing __shfl_sync on gfx1151 builds --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index c4e1240..fc43af4 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -46,7 +46,8 @@ COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp # build -RUN git clean -xdf \ +RUN set -euo pipefail \ + && git clean -xdf \ && git pull \ && git submodule update --recursive \ && cat > /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h <<'EOF' @@ -65,8 +66,8 @@ RUN git clean -xdf \ #endif #endif EOF - && f=/opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \ - && grep -q 'hip_shfl_fix.h' "$f" || sed -i '1i #include "hip_shfl_fix.h"' "$f" \ + # remove any old inline hack you had in mma.cuh (safe if absent) + && sed -i '/HIP_HAS_SHFL_SYNC_FUNCS/,+20d' /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh || true \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -76,7 +77,7 @@ EOF -DROCM_PATH=/opt/rocm \ -DHIP_PATH=/opt/rocm \ -DHIP_PLATFORM=amd \ - -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm" \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ && cmake --build build --config Release -- -j"$(nproc)" \ && cmake --install build --config Release From 48ba7c43a19412d70e06fe79474f8b003228dc33 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 10:42:37 +0000 Subject: [PATCH 17/21] Add HIP shuffle compatibility shim header for gfx1151 --- ggml/src/ggml-cuda/hip_shfl_fix.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 ggml/src/ggml-cuda/hip_shfl_fix.h diff --git a/ggml/src/ggml-cuda/hip_shfl_fix.h b/ggml/src/ggml-cuda/hip_shfl_fix.h new file mode 100644 index 0000000..90bff1a --- /dev/null +++ b/ggml/src/ggml-cuda/hip_shfl_fix.h @@ -0,0 +1,14 @@ +#ifndef HIP_SHFL_FIX_H +#define HIP_SHFL_FIX_H +#ifdef __HIP_PLATFORM_AMD__ + #ifndef __shfl_sync + #define __shfl_sync(mask,var,srcLane,width) __shfl((var),(srcLane),(width)) + #endif + #ifndef __shfl_up_sync + #define __shfl_up_sync(mask,var,delta,width) __shfl_up((var),(delta),(width)) + #endif + #ifndef __shfl_xor_sync + #define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor((var),(laneMask),(width)) + #endif +#endif +#endif From e9ed0bac22f545946af9a3e427a5bd8122bf427c Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 10:44:26 +0000 Subject: [PATCH 18/21] Copy local HIP shuffle shim into build image to restore __shfl_sync support on gfx1151 --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 29 +++++-------------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index fc43af4..21d3177 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -41,33 +41,15 @@ RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh WORKDIR /opt/llama.cpp RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . +# overwrite upstream header with our local fixed version +COPY ggml/src/ggml-cuda/hip_shfl_fix.h /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h + # Apply # rocWMMA patch COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp -# build +# Build RUN set -euo pipefail \ - && git clean -xdf \ - && git pull \ - && git submodule update --recursive \ - && cat > /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h <<'EOF' -#ifndef HIP_SHFL_FIX_H -#define HIP_SHFL_FIX_H -#ifdef __HIP_PLATFORM_AMD__ - #ifndef __shfl_sync - #define __shfl_sync(mask,var,srcLane,width) __shfl((var),(srcLane),(width)) - #endif - #ifndef __shfl_up_sync - #define __shfl_up_sync(mask,var,delta,width) __shfl_up((var),(delta),(width)) - #endif - #ifndef __shfl_xor_sync - #define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor((var),(laneMask),(width)) - #endif -#endif -#endif -EOF - # remove any old inline hack you had in mma.cuh (safe if absent) - && sed -i '/HIP_HAS_SHFL_SYNC_FUNCS/,+20d' /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh || true \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -77,11 +59,10 @@ EOF -DROCM_PATH=/opt/rocm \ -DHIP_PATH=/opt/rocm \ -DHIP_PLATFORM=amd \ - -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ && cmake --build build --config Release -- -j"$(nproc)" \ && cmake --install build --config Release - # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig From 0fc19e1849a3d2195344d02d0ad1b1579f94410e Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 11:12:07 +0000 Subject: [PATCH 19/21] moving folder to the right place --- {ggml => toolboxes/ggml}/src/ggml-cuda/hip_shfl_fix.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {ggml => toolboxes/ggml}/src/ggml-cuda/hip_shfl_fix.h (100%) diff --git a/ggml/src/ggml-cuda/hip_shfl_fix.h b/toolboxes/ggml/src/ggml-cuda/hip_shfl_fix.h similarity index 100% rename from ggml/src/ggml-cuda/hip_shfl_fix.h rename to toolboxes/ggml/src/ggml-cuda/hip_shfl_fix.h From a04405653482f9cafb29219826f488718801b573 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 11:54:52 +0000 Subject: [PATCH 20/21] Use absolute include path for HIP shuffle shim to fix CMake compiler detection --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index 21d3177..454c846 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -59,10 +59,11 @@ RUN set -euo pipefail \ -DROCM_PATH=/opt/rocm \ -DHIP_PATH=/opt/rocm \ -DHIP_PLATFORM=amd \ - -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ - && cmake --build build --config Release -- -j"$(nproc)" \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ + && cmake --build build --config Release -- -j\"$(nproc)\" \ && cmake --install build --config Release + # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig From 9529c03e61cd23e94d21285a52930a14550e1999 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 12 Nov 2025 12:32:09 +0000 Subject: [PATCH 21/21] Fix Docker build parallelism flag by removing extra quoting around -j$(nproc) --- toolboxes/Dockerfile.rocm-6.4.4-rocwmma | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index 454c846..ba977f6 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -60,7 +60,7 @@ RUN set -euo pipefail \ -DHIP_PATH=/opt/rocm \ -DHIP_PLATFORM=amd \ -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ - && cmake --build build --config Release -- -j\"$(nproc)\" \ + && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release