Addin rocm7-rocwaam toolbox
This commit is contained in:
@@ -28,7 +28,7 @@ jobs:
|
|||||||
IN='${{ inputs.backends }}'
|
IN='${{ inputs.backends }}'
|
||||||
|
|
||||||
if [[ "$IN" == "all" || -z "$IN" ]]; then
|
if [[ "$IN" == "all" || -z "$IN" ]]; then
|
||||||
JSON='["rocm-6.4.2","rocm-6.4.2-rocwaam", "rocm-7beta","rocm-7rc","vulkan-amdvlk","vulkan-radv"]'
|
JSON='["rocm-6.4.2","rocm-6.4.2-rocwaam", "rocm-7beta","rocm-7rc","rocm-7rc-rocwaam","vulkan-amdvlk","vulkan-radv"]'
|
||||||
else
|
else
|
||||||
# Remove spaces and build JSON array from comma list
|
# Remove spaces and build JSON array from comma list
|
||||||
IN_CLEAN=$(echo "$IN" | tr -d '[:space:]')
|
IN_CLEAN=$(echo "$IN" | tr -d '[:space:]')
|
||||||
|
|||||||
+1
-1
@@ -30,7 +30,7 @@ podman build --no-cache -t llama-vulkan-radv -f Dockerfile.vulkan-radv .
|
|||||||
**Example: Build the ROCm 6.4.2 toolbox image**
|
**Example: Build the ROCm 6.4.2 toolbox image**
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cd ../rocm-6.4.2
|
cd toolboxes
|
||||||
podman build --no-cache -t llama-rocm-6.4.2 -f Dockerfile.rocm-6.4.2 .
|
podman build --no-cache -t llama-rocm-6.4.2 -f Dockerfile.rocm-6.4.2 .
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ TOOLBOXES["llama-rocm-6.4.2"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4
|
|||||||
TOOLBOXES["llama-rocm-6.4.2-rocwaam"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.2-rocwaam --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
TOOLBOXES["llama-rocm-6.4.2-rocwaam"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-6.4.2-rocwaam --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
||||||
TOOLBOXES["llama-rocm-7beta"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7beta --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
TOOLBOXES["llama-rocm-7beta"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7beta --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
||||||
TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
TOOLBOXES["llama-rocm-7rc"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
||||||
|
TOOLBOXES["llama-rocm-7rc-rocwaam"]="docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwaam --device /dev/dri --device /dev/kfd --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined"
|
||||||
|
|
||||||
function usage() {
|
function usage() {
|
||||||
echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]"
|
echo "Usage: $0 [all|toolbox-name1 toolbox-name2 ...]"
|
||||||
|
|||||||
@@ -0,0 +1,91 @@
|
|||||||
|
FROM fedora:rawhide
|
||||||
|
|
||||||
|
# 1) Install dependencies
|
||||||
|
RUN dnf install -y \
|
||||||
|
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \
|
||||||
|
radeontop git vim patch curl ninja-build \
|
||||||
|
&& dnf clean all
|
||||||
|
|
||||||
|
# 2) Download ROCm nightly tarball
|
||||||
|
WORKDIR /tmp
|
||||||
|
RUN curl -L -o therock.tar.gz \
|
||||||
|
https://github.com/ROCm/TheRock/releases/download/nightly-tarball/therock-dist-linux-gfx1151-7.0.0rc20250714.tar.gz
|
||||||
|
|
||||||
|
# 3) Extract into /opt/rocm-7.0
|
||||||
|
RUN mkdir -p /opt/rocm-7.0 \
|
||||||
|
&& tar xvf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1
|
||||||
|
|
||||||
|
# 4) Bake in ROCm env + full system PATH
|
||||||
|
ENV ROCM_PATH=/opt/rocm-7.0 \
|
||||||
|
HIP_PLATFORM=amd \
|
||||||
|
HIP_PATH=/opt/rocm-7.0 \
|
||||||
|
HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin \
|
||||||
|
HIP_INCLUDE_PATH=/opt/rocm-7.0/include \
|
||||||
|
HIP_LIB_PATH=/opt/rocm-7.0/lib \
|
||||||
|
HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode \
|
||||||
|
PATH=/opt/rocm-7.0/bin:/opt/rocm-7.0/llvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
|
||||||
|
LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \
|
||||||
|
LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \
|
||||||
|
CPATH=/opt/rocm-7.0/include \
|
||||||
|
PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig
|
||||||
|
|
||||||
|
# 5) profile.d snippet for login & interactive shells
|
||||||
|
RUN printf '%s\n' \
|
||||||
|
'export ROCM_PATH=/opt/rocm-7.0' \
|
||||||
|
'export HIP_PLATFORM=amd' \
|
||||||
|
'export HIP_PATH=/opt/rocm-7.0' \
|
||||||
|
'export HIP_CLANG_PATH=/opt/rocm-7.0/llvm/bin' \
|
||||||
|
'export HIP_INCLUDE_PATH=/opt/rocm-7.0/include' \
|
||||||
|
'export HIP_LIB_PATH=/opt/rocm-7.0/lib' \
|
||||||
|
'export HIP_DEVICE_LIB_PATH=/opt/rocm-7.0/lib/llvm/amdgcn/bitcode' \
|
||||||
|
'export PATH="$ROCM_PATH/bin:$HIP_CLANG_PATH:$PATH"' \
|
||||||
|
'export LD_LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib"' \
|
||||||
|
'export LIBRARY_PATH="$HIP_LIB_PATH:$ROCM_PATH/lib:$ROCM_PATH/lib64"' \
|
||||||
|
'export CPATH="$HIP_INCLUDE_PATH"' \
|
||||||
|
'export PKG_CONFIG_PATH="$ROCM_PATH/lib/pkgconfig"' \
|
||||||
|
'export ROCBLAS_USE_HIPBLASLT=1' \
|
||||||
|
> /etc/profile.d/rocm.sh \
|
||||||
|
&& chmod +x /etc/profile.d/rocm.sh \
|
||||||
|
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
|
||||||
|
|
||||||
|
# Install ROCWAAM
|
||||||
|
WORKDIR /opt/
|
||||||
|
COPY ./build-rocwaam.sh .
|
||||||
|
RUN chmod +x build-rocwaam.sh
|
||||||
|
RUN ./build-rocwaam.sh
|
||||||
|
|
||||||
|
# 6) Clone llama.cpp
|
||||||
|
WORKDIR /opt/llama.cpp
|
||||||
|
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . \
|
||||||
|
&& git clean -xdf \
|
||||||
|
&& git submodule update --recursive
|
||||||
|
|
||||||
|
# Apply PAtch for ROCWAAM
|
||||||
|
COPY ./apply-rocwmma-fix.sh /opt
|
||||||
|
RUN chmod +x /opt/apply-rocwmma-fix.sh
|
||||||
|
RUN /opt/apply-rocwmma-fix.sh /opt/llama.cpp
|
||||||
|
|
||||||
|
# 7) Apply patchpatch and apply
|
||||||
|
COPY hip-rocm7rc.patch /opt/llama.cpp/hip-rocm7rc.patch
|
||||||
|
RUN patch -p1 < hip-rocm7rc.patch
|
||||||
|
|
||||||
|
# 8) Configure, build & install llama.cpp with HIP
|
||||||
|
RUN cmake -S . -B build \
|
||||||
|
-DGGML_HIP=ON \
|
||||||
|
-DAMDGPU_TARGETS=gfx1151 \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DLLAMA_HIP_UMA=ON \
|
||||||
|
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
||||||
|
&& cmake --build build --config Release -- -j$(nproc) \
|
||||||
|
&& cmake --install build --config Release
|
||||||
|
|
||||||
|
# 9) Copy the .so from build/bin into /usr/lib64 so ldconfig can see it
|
||||||
|
RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \
|
||||||
|
&& ldconfig
|
||||||
|
|
||||||
|
# 10) Install helper script
|
||||||
|
COPY gguf-vram-estimator.py /usr/local/bin/
|
||||||
|
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
|
||||||
|
|
||||||
|
# 11) Default to interactive bash
|
||||||
|
CMD ["/bin/bash"]
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# apply-rocwmma-fix.sh - Apply rocWMMA compatibility fixes to llama.cpp
|
||||||
|
# Usage: ./apply-rocwmma-fix.sh <path-to-llama.cpp-directory>
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
LLAMA_DIR="${1:-}"
|
||||||
|
|
||||||
|
if [[ -z "$LLAMA_DIR" ]]; then
|
||||||
|
echo "Usage: $0 <path-to-llama.cpp-directory>"
|
||||||
|
echo ""
|
||||||
|
echo "This script applies rocWMMA compatibility fixes to a llama.cpp checkout."
|
||||||
|
echo "The fixes resolve warp synchronization mask type conflicts between"
|
||||||
|
echo "ROCm headers and CUDA-style code when building with GGML_HIP_ROCWMMA_FATTN=ON."
|
||||||
|
echo ""
|
||||||
|
echo "Example:"
|
||||||
|
echo " $0 ./llama.cpp"
|
||||||
|
echo " $0 /path/to/your/llama.cpp"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -d "$LLAMA_DIR" ]]; then
|
||||||
|
echo "Error: Directory '$LLAMA_DIR' does not exist"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f "$LLAMA_DIR/CMakeLists.txt" ]] || ! grep -q "llama" "$LLAMA_DIR/CMakeLists.txt" 2>/dev/null; then
|
||||||
|
echo "Error: '$LLAMA_DIR' does not appear to be a llama.cpp directory"
|
||||||
|
echo "Expected to find CMakeLists.txt with 'llama' references"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
VENDOR_HIP_FILE="$LLAMA_DIR/ggml/src/ggml-cuda/vendors/hip.h"
|
||||||
|
|
||||||
|
if [[ ! -f "$VENDOR_HIP_FILE" ]]; then
|
||||||
|
echo "Error: HIP vendor header not found at: $VENDOR_HIP_FILE"
|
||||||
|
echo "This script requires a llama.cpp version with HIP support"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Applying rocWMMA compatibility fixes to: $LLAMA_DIR"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if fixes are already applied
|
||||||
|
if grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE" 2>/dev/null; then
|
||||||
|
echo "rocWMMA fixes appear to already be applied (found GGML_HIP_WARP_MASK)"
|
||||||
|
echo "To reapply, please first revert changes and run this script again"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Step 1: Modifying HIP vendor header..."
|
||||||
|
|
||||||
|
# Backup the original file
|
||||||
|
cp "$VENDOR_HIP_FILE" "$VENDOR_HIP_FILE.backup"
|
||||||
|
|
||||||
|
# Find the line with __shfl_sync and __shfl_xor_sync definitions
|
||||||
|
SHFL_LINE=$(grep -n "^#define __shfl_sync" "$VENDOR_HIP_FILE" | head -1 | cut -d: -f1)
|
||||||
|
|
||||||
|
if [[ -z "$SHFL_LINE" ]]; then
|
||||||
|
echo "Error: Could not find __shfl_sync macro definition in $VENDOR_HIP_FILE"
|
||||||
|
echo "This script may need updates for this version of llama.cpp"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create a temporary file with the fix
|
||||||
|
{
|
||||||
|
# Print lines before the __shfl_sync definition
|
||||||
|
head -n $((SHFL_LINE - 1)) "$VENDOR_HIP_FILE"
|
||||||
|
|
||||||
|
# Add our conditional compilation block
|
||||||
|
cat << 'EOF'
|
||||||
|
#ifdef GGML_HIP_ROCWMMA_FATTN
|
||||||
|
// ROCm requires 64-bit masks for __shfl_*_sync functions
|
||||||
|
#define GGML_HIP_WARP_MASK 0xFFFFFFFFFFFFFFFFULL
|
||||||
|
#else
|
||||||
|
#define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width)
|
||||||
|
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||||
|
#define GGML_HIP_WARP_MASK 0xFFFFFFFF
|
||||||
|
#endif
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Skip the original __shfl_sync and __shfl_xor_sync lines and print the rest
|
||||||
|
tail -n +$((SHFL_LINE + 2)) "$VENDOR_HIP_FILE"
|
||||||
|
|
||||||
|
} > "$VENDOR_HIP_FILE.tmp"
|
||||||
|
|
||||||
|
mv "$VENDOR_HIP_FILE.tmp" "$VENDOR_HIP_FILE"
|
||||||
|
|
||||||
|
echo " ✓ Added conditional GGML_HIP_WARP_MASK macro to vendor header"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 2: Replacing hardcoded warp masks in CUDA files..."
|
||||||
|
|
||||||
|
# Find all .cu and .cuh files in the ggml/src/ggml-cuda directory
|
||||||
|
CUDA_FILES=($(find "$LLAMA_DIR/ggml/src/ggml-cuda" -name "*.cu" -o -name "*.cuh" 2>/dev/null | sort))
|
||||||
|
|
||||||
|
if [[ ${#CUDA_FILES[@]} -eq 0 ]]; then
|
||||||
|
echo "Warning: No CUDA files found in $LLAMA_DIR/ggml/src/ggml-cuda"
|
||||||
|
echo "This may be expected for some llama.cpp versions"
|
||||||
|
else
|
||||||
|
MODIFIED_COUNT=0
|
||||||
|
|
||||||
|
for file in "${CUDA_FILES[@]}"; do
|
||||||
|
# Check if file contains the hardcoded masks
|
||||||
|
if grep -q "0xFFFFFFFF\|0xffffffff" "$file" 2>/dev/null; then
|
||||||
|
# Create backup
|
||||||
|
cp "$file" "$file.backup"
|
||||||
|
|
||||||
|
# Replace both uppercase and lowercase versions
|
||||||
|
sed -i 's/0xFFFFFFFF/GGML_HIP_WARP_MASK/g; s/0xffffffff/GGML_HIP_WARP_MASK/g' "$file"
|
||||||
|
|
||||||
|
MODIFIED_COUNT=$((MODIFIED_COUNT + 1))
|
||||||
|
echo " ✓ Modified: $(basename "$file")"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo " ✓ Modified $MODIFIED_COUNT CUDA files"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 3: Verification..."
|
||||||
|
|
||||||
|
# Verify the vendor header was modified correctly
|
||||||
|
if grep -q "GGML_HIP_ROCWMMA_FATTN" "$VENDOR_HIP_FILE" && grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE"; then
|
||||||
|
echo " ✓ Vendor header modification verified"
|
||||||
|
else
|
||||||
|
echo " ✗ Vendor header modification failed"
|
||||||
|
# Restore backup
|
||||||
|
mv "$VENDOR_HIP_FILE.backup" "$VENDOR_HIP_FILE"
|
||||||
|
echo " ✓ Restored original vendor header"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🎉 rocWMMA compatibility fixes applied successfully!"
|
||||||
|
echo ""
|
||||||
|
echo "What was changed:"
|
||||||
|
echo " • Added conditional GGML_HIP_WARP_MASK macro to ggml/src/ggml-cuda/vendors/hip.h"
|
||||||
|
echo " • Replaced hardcoded 0xFFFFFFFF/0xffffffff with GGML_HIP_WARP_MASK in CUDA files"
|
||||||
|
echo ""
|
||||||
|
echo "Behavior:"
|
||||||
|
echo " • For regular HIP builds: GGML_HIP_WARP_MASK = 0xFFFFFFFF (no change)"
|
||||||
|
echo " • For rocWMMA builds: GGML_HIP_WARP_MASK = 0xFFFFFFFFFFFFFFFFULL (64-bit masks)"
|
||||||
|
echo ""
|
||||||
|
echo "To build with rocWMMA support, use:"
|
||||||
|
echo " cmake -B build -S '$LLAMA_DIR' -DGGML_HIP=ON -DAMDGPU_TARGETS=\"gfx1151\" -DGGML_HIP_ROCWMMA_FATTN=ON"
|
||||||
|
echo ""
|
||||||
|
echo "Backup files were created with .backup extension in case you need to revert."
|
||||||
|
echo ""
|
||||||
|
echo "Done! Your llama.cpp checkout now supports rocWMMA builds."
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
git clone https://github.com/ROCm/rocWMMA
|
||||||
|
cd rocWMMA
|
||||||
|
|
||||||
|
# Change FP8 check from FAIL to STATUS
|
||||||
|
|
||||||
|
rm -rf build; mkdir build; CC=$ROCM_PATH/llvm/bin/amdclang CXX=$ROCM_PATH/llvm/bin/amdclang++ cmake -B build . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCWMMA_BUILD_TESTS=OFF -DROCWMMA_BUILD_SAMPLES=OFF -DGPU_TARGETS="gfx1151"
|
||||||
|
|
||||||
|
cmake --build build -j$(nproc)
|
||||||
|
sudo cmake --install build
|
||||||
Reference in New Issue
Block a user