Merge remote-tracking branch 'origin/main' into pr-20

This commit is contained in:
Donato Capitella
2025-11-12 13:19:56 +00:00
11 changed files with 522 additions and 12 deletions
+1 -1
View File
@@ -28,7 +28,7 @@ jobs:
IN='${{ inputs.backends }}'
if [[ "$IN" == "all" || -z "$IN" ]]; then
JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7rc","rocm-7rc-rocwmma","rocm-7.1-rocwmma","vulkan-amdvlk","vulkan-radv"]'
JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7.1","rocm-7.1-rocwmma","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]'
else
# Remove spaces and build JSON array from comma list
IN_CLEAN=$(echo "$IN" | tr -d '[:space:]')
+1 -1
View File
@@ -44,7 +44,7 @@ jobs:
run: |
IN='${{ github.event.inputs.backends }}'
if [[ "$IN" == "all" || -z "$IN" ]]; then
JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7beta","rocm-7rc","rocm-7rc-rocwmma","rocm-7.1-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]'
JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7.1","rocm-7.1-rocwmma","rocm-7beta","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]'
else
IN_CLEAN=$(echo "$IN" | tr -d '[:space:]')
JSON='["'${IN_CLEAN//,/\",\"}'"]'
+1 -1
View File
@@ -145,7 +145,7 @@ Once inside, the following commands show how to run local LLMs:
* `llama-cli --list-devices`
*Lists available GPU devices for Llama.cpp.*
* `llama-cli --no-mmap -ngl 999 -fa -m <model>`
* `llama-cli --no-mmap -ngl 999 -fa 1 -m <model>`
*Runs inference on the specified model, with all layers on GPU and flash attention enabled (replace \*\* with your model path).*
## 2.3 Downloading GGUF Models from HuggingFace
+5
View File
@@ -9,8 +9,13 @@ TOOLBOXES["llama-vulkan-amdvlk"]="docker.io/kyuz0/amd-strix-halo-toolboxes:vulka
TOOLBOXES["llama-vulkan-radv"]="docker.io/kyuz0/amd-strix-halo-toolboxes:vulkan-radv --device /dev/dri --group-add video --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-6.4.4"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.4 --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-6.4.4-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.4-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7.1"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1 --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7.1-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7rc-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7alpha"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7alpha-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
TOOLBOXES["llama-rocm-7alpha-rocwmma-improved"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha-rocwmma-improved --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
function usage() {
echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]"
+7 -8
View File
@@ -41,17 +41,15 @@ RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git .
# overwrite upstream header with our local fixed version
COPY ggml/src/ggml-cuda/hip_shfl_fix.h /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h
# Apply # rocWMMA patch
COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh
RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp
# build
RUN git clean -xdf \
&& git pull \
&& git submodule update --recursive \
&& echo -e '#ifndef HIP_HAS_SHFL_SYNC_FUNCS\n#define HIP_HAS_SHFL_SYNC_FUNCS\n#ifndef __shfl_sync\n#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width)\n#endif\n#ifndef __shfl_xor_sync\n#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width)\n#endif\n#ifndef __shfl_up_sync\n#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width)\n#endif\n#endif\n' \
| cat - /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh > /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \
&& mv /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \
# Build
RUN set -euo pipefail \
&& cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
@@ -61,10 +59,11 @@ RUN git clean -xdf \
-DROCM_PATH=/opt/rocm \
-DHIP_PATH=/opt/rocm \
-DHIP_PLATFORM=amd \
-DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm" \
-DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \
&& cmake --build build --config Release -- -j$(nproc) \
&& cmake --install build --config Release
# libs
RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \
&& ldconfig
+111
View File
@@ -0,0 +1,111 @@
# build stage
FROM registry.fedoraproject.org/fedora:rawhide AS builder
# rocm 7.1 repo
RUN <<'EOF'
tee /etc/yum.repos.d/rocm.repo <<REPO
[ROCm-7.1]
name=ROCm7.1
baseurl=https://repo.radeon.com/rocm/el9/7.1/main
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
REPO
EOF
# deps
RUN dnf -y --nodocs --setopt=install_weak_deps=False \
--exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \
install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel ninja-build \
rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \
rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \
rocminfo radeontop \
git-core vim sudo rsync \
&& dnf clean all && rm -rf /var/cache/dnf/*
# rocm env
ENV ROCM_PATH=/opt/rocm \
HIP_PATH=/opt/rocm \
HIP_CLANG_PATH=/opt/rocm/llvm/bin \
HIP_DEVICE_LIB_PATH=/opt/rocm/amdgcn/bitcode \
PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH
# llama.cpp
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git .
# build
RUN git clean -xdf \
&& git pull \
&& git submodule update --recursive \
&& cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
-DCMAKE_BUILD_TYPE=Release \
-DLLAMA_HIP_UMA=ON \
-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON \
-DROCM_PATH=/opt/rocm \
-DHIP_PATH=/opt/rocm \
-DHIP_PLATFORM=amd \
-DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm" \
&& cmake --build build --config Release -- -j$(nproc) \
&& cmake --install build --config Release
# libs
RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \
&& ldconfig
# helper
COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
# runtime stage
FROM registry.fedoraproject.org/fedora-minimal:rawhide
# rocm 7.1 repo
RUN <<'EOF'
tee /etc/yum.repos.d/rocm.repo <<REPO
[ROCm-7.1]
name=ROCm7.1
baseurl=https://repo.radeon.com/rocm/el9/7.1/main
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
REPO
EOF
# runtime deps
RUN microdnf -y --nodocs --setopt=install_weak_deps=0 \
--exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \
install \
bash ca-certificates libatomic libstdc++ libgcc libgomp sudo \
hip-runtime-amd rocblas hipblas \
rocminfo radeontop \
&& microdnf clean all && rm -rf /var/cache/dnf/*
# copy
COPY --from=builder /usr/local/ /usr/local/
# ld
RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \
&& echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \
&& ldconfig \
&& cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \
&& ldconfig
# helper
COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
# profile
RUN printf '%s\n' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh && chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
# shell
CMD ["/bin/bash"]
+124
View File
@@ -0,0 +1,124 @@
# build
FROM registry.fedoraproject.org/fedora:rawhide AS builder
RUN dnf -y --nodocs --setopt=install_weak_deps=False install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \
radeontop git vim patch curl ninja-build tar xz aria2c \
&& dnf clean all && rm -rf /var/cache/dnf/*
# find & fetch the latest Linux 7.x.x tarball (gfx1151)
WORKDIR /tmp
ARG ROCM_MAJOR_VER=7
ARG GFX=gfx1151
RUN set -euo pipefail; \
BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \
PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \
KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \
| tr '<' '\n' \
| grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \
| sort -V | tail -n1)"; \
echo "Latest tarball: ${KEY}"; \
aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz
RUN mkdir -p /opt/rocm-7.0 \
&& tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1
ENV ROCM_PATH=/opt/rocm-7.0 \
HIP_PLATFORM=amd \
HIP_PATH=/opt/rocm-7.0 \
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
HIP_LIB_PATH=/opt/rocm-7.0/lib \
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
CPATH=/opt/rocm-7.0/include \
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
RUN printf '%s\n' \
'export ROCM_PATH=/opt/rocm-7.0' \
'export HIP_PLATFORM=amd' \
'export HIP_PATH=/opt/rocm-7.0' \
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
'export CPATH="$HIP_INCLUDE_PATH"' \
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh \
&& chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \
&& git clean -xdf \
&& git submodule update --recursive
RUN cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
-DCMAKE_BUILD_TYPE=Release \
-DLLAMA_HIP_UMA=ON \
&& cmake --build build --config Release -- -j$(nproc) \
&& cmake --install build --config Release
# keep bin; drop headers/docs/static libs; drop source tree
RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \
&& rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \
/opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \
&& rm -rf /opt/llama.cpp
# runtime
FROM registry.fedoraproject.org/fedora-minimal:rawhide
RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \
bash ca-certificates libatomic libstdc++ libgcc radeontop vim \
&& microdnf clean all && rm -rf /var/cache/dnf/*
COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0
COPY --from=builder /usr/local/ /usr/local/
COPY gguf-vram-estimator.py /usr/local/bin/
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
ENV ROCM_PATH=/opt/rocm-7.0 \
HIP_PLATFORM=amd \
HIP_PATH=/opt/rocm-7.0 \
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
HIP_LIB_PATH=/opt/rocm-7.0/lib \
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
CPATH=/opt/rocm-7.0/include \
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
RUN printf '%s\n' \
'export ROCM_PATH=/opt/rocm-7.0' \
'export HIP_PLATFORM=amd' \
'export HIP_PATH=/opt/rocm-7.0' \
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
'export CPATH="$HIP_INCLUDE_PATH"' \
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh \
&& chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
# make /usr/local libs visible without touching env
RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \
&& echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \
&& ldconfig
CMD ["/bin/bash"]
+126
View File
@@ -0,0 +1,126 @@
# build
FROM registry.fedoraproject.org/fedora:rawhide AS builder
RUN dnf -y --nodocs --setopt=install_weak_deps=False install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \
radeontop git vim patch curl ninja-build tar xz aria2c \
&& dnf clean all && rm -rf /var/cache/dnf/*
# find & fetch the latest Linux 7.x.x tarball (gfx1151)
WORKDIR /tmp
ARG ROCM_MAJOR_VER=7
ARG GFX=gfx1151
RUN set -euo pipefail; \
BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \
PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \
KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \
| tr '<' '\n' \
| grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \
| sort -V | tail -n1)"; \
echo "Latest tarball: ${KEY}"; \
aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz
RUN mkdir -p /opt/rocm-7.0 \
&& tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1
ENV ROCM_PATH=/opt/rocm-7.0 \
HIP_PLATFORM=amd \
HIP_PATH=/opt/rocm-7.0 \
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
HIP_LIB_PATH=/opt/rocm-7.0/lib \
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
CPATH=/opt/rocm-7.0/include \
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
RUN printf '%s\n' \
'export ROCM_PATH=/opt/rocm-7.0' \
'export HIP_PLATFORM=amd' \
'export HIP_PATH=/opt/rocm-7.0' \
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
'export CPATH="$HIP_INCLUDE_PATH"' \
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh \
&& chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
WORKDIR /opt
COPY ./build-rocwmma.sh .
RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \
&& git clean -xdf \
&& git submodule update --recursive
COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh
RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp
RUN cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_RPC=ON \
-DGGML_HIP_ROCWMMA_FATTN=ON \
&& cmake --build build --config Release -- -j$(nproc) \
&& cmake --install build --config Release
# runtime
FROM registry.fedoraproject.org/fedora-minimal:rawhide
RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \
bash ca-certificates libatomic libstdc++ libgcc radeontop vim \
&& microdnf clean all && rm -rf /var/cache/dnf/*
COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0
COPY --from=builder /usr/local/ /usr/local/
COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/
COPY gguf-vram-estimator.py /usr/local/bin/
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
ENV ROCM_PATH=/opt/rocm-7.0 \
HIP_PLATFORM=amd \
HIP_PATH=/opt/rocm-7.0 \
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
HIP_LIB_PATH=/opt/rocm-7.0/lib \
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
CPATH=/opt/rocm-7.0/include \
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
RUN printf '%s\n' \
'export ROCM_PATH=/opt/rocm-7.0' \
'export HIP_PLATFORM=amd' \
'export HIP_PATH=/opt/rocm-7.0' \
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
'export CPATH="$HIP_INCLUDE_PATH"' \
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh \
&& chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
# make /usr/local libs visible without touching env
RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \
&& echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \
&& ldconfig
CMD ["/bin/bash"]
@@ -0,0 +1,126 @@
# build
FROM registry.fedoraproject.org/fedora:rawhide AS builder
RUN dnf -y --nodocs --setopt=install_weak_deps=False install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \
radeontop git vim patch curl ninja-build tar xz aria2c \
&& dnf clean all && rm -rf /var/cache/dnf/*
# find & fetch the latest Linux 7.x.x tarball (gfx1151)
WORKDIR /tmp
ARG ROCM_MAJOR_VER=7
ARG GFX=gfx1151
RUN set -euo pipefail; \
BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \
PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \
KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \
| tr '<' '\n' \
| grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \
| sort -V | tail -n1)"; \
echo "Latest tarball: ${KEY}"; \
aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz
RUN mkdir -p /opt/rocm-7.0 \
&& tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1
ENV ROCM_PATH=/opt/rocm-7.0 \
HIP_PLATFORM=amd \
HIP_PATH=/opt/rocm-7.0 \
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
HIP_LIB_PATH=/opt/rocm-7.0/lib \
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
CPATH=/opt/rocm-7.0/include \
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
RUN printf '%s\n' \
'export ROCM_PATH=/opt/rocm-7.0' \
'export HIP_PLATFORM=amd' \
'export HIP_PATH=/opt/rocm-7.0' \
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
'export CPATH="$HIP_INCLUDE_PATH"' \
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh \
&& chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
WORKDIR /opt
COPY ./build-rocwmma.sh .
RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/hjc4869/llama.cpp.git . \
&& git clean -xdf \
&& git submodule update --recursive
COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh
RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp
RUN cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_RPC=ON \
-DGGML_HIP_ROCWMMA_FATTN=ON \
&& cmake --build build --config Release -- -j$(nproc) \
&& cmake --install build --config Release
# runtime
FROM registry.fedoraproject.org/fedora-minimal:rawhide
RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \
bash ca-certificates libatomic libstdc++ libgcc radeontop vim \
&& microdnf clean all && rm -rf /var/cache/dnf/*
COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0
COPY --from=builder /usr/local/ /usr/local/
COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/
COPY gguf-vram-estimator.py /usr/local/bin/
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
ENV ROCM_PATH=/opt/rocm-7.0 \
HIP_PLATFORM=amd \
HIP_PATH=/opt/rocm-7.0 \
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
HIP_LIB_PATH=/opt/rocm-7.0/lib \
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
CPATH=/opt/rocm-7.0/include \
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
RUN printf '%s\n' \
'export ROCM_PATH=/opt/rocm-7.0' \
'export HIP_PLATFORM=amd' \
'export HIP_PATH=/opt/rocm-7.0' \
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
'export CPATH="$HIP_INCLUDE_PATH"' \
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
'export ROCBLAS_USE_HIPBLASLT=1' \
> /etc/profile.d/rocm.sh \
&& chmod +x /etc/profile.d/rocm.sh \
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
# make /usr/local libs visible without touching env
RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \
&& echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \
&& ldconfig
CMD ["/bin/bash"]
+6 -1
View File
@@ -59,7 +59,12 @@ RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \
&& git clean -xdf \
&& git submodule update --recursive
&& git submodule update --recursive \
&& git config user.email "builder@localhost" \
&& git config user.name "Container Builder" \
&& git fetch origin pull/15405/head:pr-15405 \
&& git merge --no-edit pr-15405
COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh
RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp
@@ -0,0 +1,14 @@
#ifndef HIP_SHFL_FIX_H
#define HIP_SHFL_FIX_H
#ifdef __HIP_PLATFORM_AMD__
#ifndef __shfl_sync
#define __shfl_sync(mask,var,srcLane,width) __shfl((var),(srcLane),(width))
#endif
#ifndef __shfl_up_sync
#define __shfl_up_sync(mask,var,delta,width) __shfl_up((var),(delta),(width))
#endif
#ifndef __shfl_xor_sync
#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor((var),(laneMask),(width))
#endif
#endif
#endif