diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index fde3015..0d6b0a8 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -28,7 +28,7 @@ jobs: IN='${{ inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7rc","rocm-7rc-rocwmma","rocm-7.1-rocwmma","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7.1","rocm-7.1-rocwmma","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]' else # Remove spaces and build JSON array from comma list IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') diff --git a/.github/workflows/prune-old-toolboxes.yml b/.github/workflows/prune-old-toolboxes.yml index 775b45d..f7c79fe 100644 --- a/.github/workflows/prune-old-toolboxes.yml +++ b/.github/workflows/prune-old-toolboxes.yml @@ -44,7 +44,7 @@ jobs: run: | IN='${{ github.event.inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7beta","rocm-7rc","rocm-7rc-rocwmma","rocm-7.1-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4-rocwmma","rocm-7.1","rocm-7.1-rocwmma","rocm-7beta","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' else IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') JSON='["'${IN_CLEAN//,/\",\"}'"]' diff --git a/README.md b/README.md index 97769b1..b5ae284 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ Once inside, the following commands show how to run local LLMs: * `llama-cli --list-devices` *Lists available GPU devices for Llama.cpp.* -* `llama-cli --no-mmap -ngl 999 -fa -m ` +* `llama-cli --no-mmap -ngl 999 -fa 1 -m ` *Runs inference on the specified model, with all layers on GPU and flash attention enabled (replace \*\* with your model path).* ## 2.3 Downloading GGUF Models from HuggingFace diff --git a/refresh-toolboxes.sh b/refresh-toolboxes.sh index 9a4dbdd..47eec80 100755 --- a/refresh-toolboxes.sh +++ b/refresh-toolboxes.sh @@ -9,8 +9,13 @@ TOOLBOXES["llama-vulkan-amdvlk"]="docker.io/kyuz0/amd-strix-halo-toolboxes:vulka TOOLBOXES["llama-vulkan-radv"]="docker.io/kyuz0/amd-strix-halo-toolboxes:vulkan-radv --device /dev/dri --group-add video --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-6.4.4"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.4 --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-6.4.4-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.4-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7.1"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1 --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7.1-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha-rocwmma"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha-rocwmma --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7alpha-rocwmma-improved"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7alpha-rocwmma-improved --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" function usage() { echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]" diff --git a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma index cb44de3..ba977f6 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4-rocwmma +++ b/toolboxes/Dockerfile.rocm-6.4.4-rocwmma @@ -41,17 +41,15 @@ RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh WORKDIR /opt/llama.cpp RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . +# overwrite upstream header with our local fixed version +COPY ggml/src/ggml-cuda/hip_shfl_fix.h /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h + # Apply # rocWMMA patch COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp -# build -RUN git clean -xdf \ - && git pull \ - && git submodule update --recursive \ - && echo -e '#ifndef HIP_HAS_SHFL_SYNC_FUNCS\n#define HIP_HAS_SHFL_SYNC_FUNCS\n#ifndef __shfl_sync\n#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width)\n#endif\n#ifndef __shfl_xor_sync\n#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width)\n#endif\n#ifndef __shfl_up_sync\n#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width)\n#endif\n#endif\n' \ - | cat - /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh > /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \ - && mv /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \ +# Build +RUN set -euo pipefail \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -61,10 +59,11 @@ RUN git clean -xdf \ -DROCM_PATH=/opt/rocm \ -DHIP_PATH=/opt/rocm \ -DHIP_PLATFORM=amd \ - -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm" \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -include /opt/llama.cpp/ggml/src/ggml-cuda/hip_shfl_fix.h -Wno-macro-redefined" \ && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release + # libs RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig diff --git a/toolboxes/Dockerfile.rocm-7.1 b/toolboxes/Dockerfile.rocm-7.1 new file mode 100644 index 0000000..63c14a9 --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7.1 @@ -0,0 +1,111 @@ +# build stage +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +# rocm 7.1 repo +RUN <<'EOF' +tee /etc/yum.repos.d/rocm.repo < /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig \ + && cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \ + && ldconfig + +# helper +COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +# profile +RUN printf '%s\n' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# shell +CMD ["/bin/bash"] + diff --git a/toolboxes/Dockerfile.rocm-7alpha b/toolboxes/Dockerfile.rocm-7alpha new file mode 100644 index 0000000..b6bd81e --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7alpha @@ -0,0 +1,124 @@ +# build +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build tar xz aria2c \ + && dnf clean all && rm -rf /var/cache/dnf/* + +# find & fetch the latest Linux 7.x.x tarball (gfx1151) +WORKDIR /tmp +ARG ROCM_MAJOR_VER=7 +ARG GFX=gfx1151 +RUN set -euo pipefail; \ + BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ + PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ + KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ + | tr '<' '\n' \ + | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ + | sort -V | tail -n1)"; \ + echo "Latest tarball: ${KEY}"; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz +RUN mkdir -p /opt/rocm-7.0 \ + && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive + +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_HIP_UMA=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# keep bin; drop headers/docs/static libs; drop source tree +RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ + && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ + /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share \ + && rm -rf /opt/llama.cpp + +# runtime +FROM registry.fedoraproject.org/fedora-minimal:rawhide + +RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim \ + && microdnf clean all && rm -rf /var/cache/dnf/* + +COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 +COPY --from=builder /usr/local/ /usr/local/ + +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# make /usr/local libs visible without touching env +RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig + +CMD ["/bin/bash"] diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma b/toolboxes/Dockerfile.rocm-7alpha-rocwmma new file mode 100644 index 0000000..3f3a795 --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma @@ -0,0 +1,126 @@ +# build +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build tar xz aria2c \ + && dnf clean all && rm -rf /var/cache/dnf/* + +# find & fetch the latest Linux 7.x.x tarball (gfx1151) +WORKDIR /tmp +ARG ROCM_MAJOR_VER=7 +ARG GFX=gfx1151 +RUN set -euo pipefail; \ + BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ + PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ + KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ + | tr '<' '\n' \ + | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ + | sort -V | tail -n1)"; \ + echo "Latest tarball: ${KEY}"; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz +RUN mkdir -p /opt/rocm-7.0 \ + && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +WORKDIR /opt +COPY ./build-rocwmma.sh . +RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh + +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive +COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh +RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp + +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_RPC=ON \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# runtime +FROM registry.fedoraproject.org/fedora-minimal:rawhide + +RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim \ + && microdnf clean all && rm -rf /var/cache/dnf/* + +COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 +COPY --from=builder /usr/local/ /usr/local/ +COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ + +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# make /usr/local libs visible without touching env +RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig + +CMD ["/bin/bash"] diff --git a/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved new file mode 100644 index 0000000..b34dbf2 --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7alpha-rocwmma-improved @@ -0,0 +1,126 @@ +# build +FROM registry.fedoraproject.org/fedora:rawhide AS builder + +RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build tar xz aria2c \ + && dnf clean all && rm -rf /var/cache/dnf/* + +# find & fetch the latest Linux 7.x.x tarball (gfx1151) +WORKDIR /tmp +ARG ROCM_MAJOR_VER=7 +ARG GFX=gfx1151 +RUN set -euo pipefail; \ + BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ + PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ + KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ + | tr '<' '\n' \ + | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ + | sort -V | tail -n1)"; \ + echo "Latest tarball: ${KEY}"; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz +RUN mkdir -p /opt/rocm-7.0 \ + && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +WORKDIR /opt +COPY ./build-rocwmma.sh . +RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh + +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/hjc4869/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive +COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh +RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp + +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_RPC=ON \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# runtime +FROM registry.fedoraproject.org/fedora-minimal:rawhide + +RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim \ + && microdnf clean all && rm -rf /var/cache/dnf/* + +COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 +COPY --from=builder /usr/local/ /usr/local/ +COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ + +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# make /usr/local libs visible without touching env +RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ + && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig + +CMD ["/bin/bash"] diff --git a/toolboxes/Dockerfile.rocm-7rc-rocwmma b/toolboxes/Dockerfile.rocm-7rc-rocwmma index 7b6cb8b..15b91fd 100644 --- a/toolboxes/Dockerfile.rocm-7rc-rocwmma +++ b/toolboxes/Dockerfile.rocm-7rc-rocwmma @@ -59,7 +59,12 @@ RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh WORKDIR /opt/llama.cpp RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ && git clean -xdf \ - && git submodule update --recursive + && git submodule update --recursive \ + && git config user.email "builder@localhost" \ + && git config user.name "Container Builder" \ + && git fetch origin pull/15405/head:pr-15405 \ + && git merge --no-edit pr-15405 + COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp diff --git a/toolboxes/ggml/src/ggml-cuda/hip_shfl_fix.h b/toolboxes/ggml/src/ggml-cuda/hip_shfl_fix.h new file mode 100644 index 0000000..90bff1a --- /dev/null +++ b/toolboxes/ggml/src/ggml-cuda/hip_shfl_fix.h @@ -0,0 +1,14 @@ +#ifndef HIP_SHFL_FIX_H +#define HIP_SHFL_FIX_H +#ifdef __HIP_PLATFORM_AMD__ + #ifndef __shfl_sync + #define __shfl_sync(mask,var,srcLane,width) __shfl((var),(srcLane),(width)) + #endif + #ifndef __shfl_up_sync + #define __shfl_up_sync(mask,var,delta,width) __shfl_up((var),(delta),(width)) + #endif + #ifndef __shfl_xor_sync + #define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor((var),(laneMask),(width)) + #endif +#endif +#endif