diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 554169a..fd1e3b5 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -28,7 +28,7 @@ jobs: IN='${{ inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.2","rocm-6.4.2-rocwaam", "rocm-7beta","rocm-7rc","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.2","rocm-6.4.2-rocwaam", "rocm-7beta","rocm-7rc","rocm-7rc-rocwaam","vulkan-amdvlk","vulkan-radv"]' else # Remove spaces and build JSON array from comma list IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') diff --git a/docs/building.md b/docs/building.md index 6a181fb..8d9141b 100644 --- a/docs/building.md +++ b/docs/building.md @@ -30,7 +30,7 @@ podman build --no-cache -t llama-vulkan-radv -f Dockerfile.vulkan-radv . **Example: Build the ROCm 6.4.2 toolbox image** ```sh -cd ../rocm-6.4.2 +cd toolboxes podman build --no-cache -t llama-rocm-6.4.2 -f Dockerfile.rocm-6.4.2 . ``` diff --git a/refresh-toolboxes.sh b/refresh-toolboxes.sh index f50ac19..e6e163c 100644 --- a/refresh-toolboxes.sh +++ b/refresh-toolboxes.sh @@ -11,6 +11,7 @@ TOOLBOXES["llama-rocm-6.4.2"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4 TOOLBOXES["llama-rocm-6.4.2-rocwaam"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.2-rocwaam --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7beta"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7beta --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" +TOOLBOXES["llama-rocm-7rc-rocwaam"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwaam --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined" function usage() { echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]" diff --git a/toolboxes/Dockerfile.rocm-7rc-rocwaam b/toolboxes/Dockerfile.rocm-7rc-rocwaam new file mode 100644 index 0000000..479852d --- /dev/null +++ b/toolboxes/Dockerfile.rocm-7rc-rocwaam @@ -0,0 +1,91 @@ +FROM fedora:rawhide + +# 1) Install dependencies +RUN dnf install -y \ + make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ + radeontop git vim patch curl ninja-build \ + && dnf clean all + +# 2) Download ROCm nightly tarball +WORKDIR /tmp +RUN curl -L -o therock.tar.gz \ + https://github.com/ROCm/TheRock/releases/download/nightly-tarball/therock-dist-linux-gfx1151-7.0.0rc20250714.tar.gz + +# 3) Extract into /opt/rocm-7.0 +RUN mkdir -p /opt/rocm-7.0 \ + && tar xvf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + +# 4) Bake in ROCm env + full system PATH +ENV ROCM_PATH=/opt/rocm-7.0 \ + HIP_PLATFORM=amd \ + HIP_PATH=/opt/rocm-7.0 \ + HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \ + HIP_INCLUDE_PATH=/opt/rocm-7.0/include \ + HIP_LIB_PATH=/opt/rocm-7.0/lib \ + HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \ + PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ + LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ + CPATH=/opt/rocm-7.0/include \ + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + +# 5) profile.d snippet for login & interactive shells +RUN printf '%s\n' \ + 'export ROCM_PATH=/opt/rocm-7.0' \ + 'export HIP_PLATFORM=amd' \ + 'export HIP_PATH=/opt/rocm-7.0' \ + 'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \ + 'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \ + 'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \ + 'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \ + 'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \ + 'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \ + 'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \ + 'export CPATH="$HIP_INCLUDE_PATH"' \ + 'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \ + 'export ROCBLAS_USE_HIPBLASLT=1' \ + > /etc/profile.d/rocm.sh \ + && chmod +x /etc/profile.d/rocm.sh \ + && echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc + +# Install ROCWAAM +WORKDIR /opt/ +COPY ./build-rocwaam.sh . +RUN chmod +x build-rocwaam.sh +RUN ./build-rocwaam.sh + +# 6) Clone llama.cpp +WORKDIR /opt/llama.cpp +RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \ + && git clean -xdf \ + && git submodule update --recursive + +# Apply PAtch for ROCWAAM +COPY ./apply-rocwmma-fix.sh /opt +RUN chmod +x /opt/apply-rocwmma-fix.sh +RUN /opt/apply-rocwmma-fix.sh /opt/llama.cpp + +# 7) Apply patchpatch and apply +COPY hip-rocm7rc.patch /opt/llama.cpp/hip-rocm7rc.patch +RUN patch -p1 < hip-rocm7rc.patch + +# 8) Configure, build & install llama.cpp with HIP +RUN cmake -S . -B build \ + -DGGML_HIP=ON \ + -DAMDGPU_TARGETS=gfx1151 \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_HIP_UMA=ON \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + && cmake --build build --config Release -- -j$(nproc) \ + && cmake --install build --config Release + +# 9) Copy the .so from build/bin into /usr/lib64 so ldconfig can see it +RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ + && ldconfig + +# 10) Install helper script +COPY gguf-vram-estimator.py /usr/local/bin/ +RUN chmod +x /usr/local/bin/gguf-vram-estimator.py + +# 11) Default to interactive bash +CMD ["/bin/bash"] diff --git a/toolboxes/apply-rocwmma-fix.sh b/toolboxes/apply-rocwmma-fix.sh new file mode 100644 index 0000000..ec176bc --- /dev/null +++ b/toolboxes/apply-rocwmma-fix.sh @@ -0,0 +1,152 @@ +#!/bin/bash + +# apply-rocwmma-fix.sh - Apply rocWMMA compatibility fixes to llama.cpp +# Usage: ./apply-rocwmma-fix.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LLAMA_DIR="${1:-}" + +if [[ -z "$LLAMA_DIR" ]]; then + echo "Usage: $0 " + echo "" + echo "This script applies rocWMMA compatibility fixes to a llama.cpp checkout." + echo "The fixes resolve warp synchronization mask type conflicts between" + echo "ROCm headers and CUDA-style code when building with GGML_HIP_ROCWMMA_FATTN=ON." + echo "" + echo "Example:" + echo " $0 ./llama.cpp" + echo " $0 /path/to/your/llama.cpp" + exit 1 +fi + +if [[ ! -d "$LLAMA_DIR" ]]; then + echo "Error: Directory '$LLAMA_DIR' does not exist" + exit 1 +fi + +if [[ ! -f "$LLAMA_DIR/CMakeLists.txt" ]] || ! grep -q "llama" "$LLAMA_DIR/CMakeLists.txt" 2>/dev/null; then + echo "Error: '$LLAMA_DIR' does not appear to be a llama.cpp directory" + echo "Expected to find CMakeLists.txt with 'llama' references" + exit 1 +fi + +VENDOR_HIP_FILE="$LLAMA_DIR/ggml/src/ggml-cuda/vendors/hip.h" + +if [[ ! -f "$VENDOR_HIP_FILE" ]]; then + echo "Error: HIP vendor header not found at: $VENDOR_HIP_FILE" + echo "This script requires a llama.cpp version with HIP support" + exit 1 +fi + +echo "Applying rocWMMA compatibility fixes to: $LLAMA_DIR" +echo "" + +# Check if fixes are already applied +if grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE" 2>/dev/null; then + echo "rocWMMA fixes appear to already be applied (found GGML_HIP_WARP_MASK)" + echo "To reapply, please first revert changes and run this script again" + exit 0 +fi + +echo "Step 1: Modifying HIP vendor header..." + +# Backup the original file +cp "$VENDOR_HIP_FILE" "$VENDOR_HIP_FILE.backup" + +# Find the line with __shfl_sync and __shfl_xor_sync definitions +SHFL_LINE=$(grep -n "^#define __shfl_sync" "$VENDOR_HIP_FILE" | head -1 | cut -d: -f1) + +if [[ -z "$SHFL_LINE" ]]; then + echo "Error: Could not find __shfl_sync macro definition in $VENDOR_HIP_FILE" + echo "This script may need updates for this version of llama.cpp" + exit 1 +fi + +# Create a temporary file with the fix +{ + # Print lines before the __shfl_sync definition + head -n $((SHFL_LINE - 1)) "$VENDOR_HIP_FILE" + + # Add our conditional compilation block + cat << 'EOF' +#ifdef GGML_HIP_ROCWMMA_FATTN +// ROCm requires 64-bit masks for __shfl_*_sync functions +#define GGML_HIP_WARP_MASK 0xFFFFFFFFFFFFFFFFULL +#else +#define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width) +#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) +#define GGML_HIP_WARP_MASK 0xFFFFFFFF +#endif +EOF + + # Skip the original __shfl_sync and __shfl_xor_sync lines and print the rest + tail -n +$((SHFL_LINE + 2)) "$VENDOR_HIP_FILE" + +} > "$VENDOR_HIP_FILE.tmp" + +mv "$VENDOR_HIP_FILE.tmp" "$VENDOR_HIP_FILE" + +echo " ✓ Added conditional GGML_HIP_WARP_MASK macro to vendor header" + +echo "" +echo "Step 2: Replacing hardcoded warp masks in CUDA files..." + +# Find all .cu and .cuh files in the ggml/src/ggml-cuda directory +CUDA_FILES=($(find "$LLAMA_DIR/ggml/src/ggml-cuda" -name "*.cu" -o -name "*.cuh" 2>/dev/null | sort)) + +if [[ ${#CUDA_FILES[@]} -eq 0 ]]; then + echo "Warning: No CUDA files found in $LLAMA_DIR/ggml/src/ggml-cuda" + echo "This may be expected for some llama.cpp versions" +else + MODIFIED_COUNT=0 + + for file in "${CUDA_FILES[@]}"; do + # Check if file contains the hardcoded masks + if grep -q "0xFFFFFFFF\|0xffffffff" "$file" 2>/dev/null; then + # Create backup + cp "$file" "$file.backup" + + # Replace both uppercase and lowercase versions + sed -i 's/0xFFFFFFFF/GGML_HIP_WARP_MASK/g; s/0xffffffff/GGML_HIP_WARP_MASK/g' "$file" + + MODIFIED_COUNT=$((MODIFIED_COUNT + 1)) + echo " ✓ Modified: $(basename "$file")" + fi + done + + echo " ✓ Modified $MODIFIED_COUNT CUDA files" +fi + +echo "" +echo "Step 3: Verification..." + +# Verify the vendor header was modified correctly +if grep -q "GGML_HIP_ROCWMMA_FATTN" "$VENDOR_HIP_FILE" && grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE"; then + echo " ✓ Vendor header modification verified" +else + echo " ✗ Vendor header modification failed" + # Restore backup + mv "$VENDOR_HIP_FILE.backup" "$VENDOR_HIP_FILE" + echo " ✓ Restored original vendor header" + exit 1 +fi + +echo "" +echo "🎉 rocWMMA compatibility fixes applied successfully!" +echo "" +echo "What was changed:" +echo " • Added conditional GGML_HIP_WARP_MASK macro to ggml/src/ggml-cuda/vendors/hip.h" +echo " • Replaced hardcoded 0xFFFFFFFF/0xffffffff with GGML_HIP_WARP_MASK in CUDA files" +echo "" +echo "Behavior:" +echo " • For regular HIP builds: GGML_HIP_WARP_MASK = 0xFFFFFFFF (no change)" +echo " • For rocWMMA builds: GGML_HIP_WARP_MASK = 0xFFFFFFFFFFFFFFFFULL (64-bit masks)" +echo "" +echo "To build with rocWMMA support, use:" +echo " cmake -B build -S '$LLAMA_DIR' -DGGML_HIP=ON -DAMDGPU_TARGETS=\"gfx1151\" -DGGML_HIP_ROCWMMA_FATTN=ON" +echo "" +echo "Backup files were created with .backup extension in case you need to revert." +echo "" +echo "Done! Your llama.cpp checkout now supports rocWMMA builds." \ No newline at end of file diff --git a/toolboxes/build-rocwaam.sh b/toolboxes/build-rocwaam.sh new file mode 100644 index 0000000..c60fc0c --- /dev/null +++ b/toolboxes/build-rocwaam.sh @@ -0,0 +1,9 @@ +git clone https://github.com/ROCm/rocWMMA +cd rocWMMA + +# Change FP8 check from FAIL to STATUS + +rm -rf build; mkdir build; CC=$ROCM_PATH/llvm/bin/amdclang CXX=$ROCM_PATH/llvm/bin/amdclang++ cmake -B build . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCWMMA_BUILD_TESTS=OFF -DROCWMMA_BUILD_SAMPLES=OFF -DGPU_TARGETS="gfx1151" + +cmake --build build -j$(nproc) +sudo cmake --install build \ No newline at end of file