further fix for rocWMMA

This commit is contained in:
Donato Capitella
2025-10-11 23:05:29 +01:00
parent f62739d243
commit e1f5aaa85c
3 changed files with 72 additions and 13 deletions
+15 -9
View File
@@ -1,11 +1,11 @@
# build stage
FROM registry.fedoraproject.org/fedora:rawhide AS builder
# rocm 6.4.6 repo
# rocm 6.4.4 repo
RUN <<'EOF'
tee /etc/yum.repos.d/rocm.repo <<REPO
[ROCm-6.4.6]
name=ROCm6.4.6
[ROCm-6.4.4]
name=ROCm6.4.4
baseurl=https://repo.radeon.com/rocm/el9/6.4.4/main
enabled=1
priority=50
@@ -18,9 +18,9 @@ EOF
RUN dnf -y --nodocs --setopt=install_weak_deps=False \
--exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \
install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel ninja-build \
rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \
rocblas rocblas-devel hipblas hipblas-devel \
rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \
rocminfo radeontop \
git-core vim sudo rsync \
&& dnf clean all && rm -rf /var/cache/dnf/*
@@ -32,20 +32,26 @@ ENV ROCM_PATH=/opt/rocm \
HIP_DEVICE_LIB_PATH=/opt/rocm/amdgcn/bitcode \
PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH
# rocWMMA
# rocWMMA
WORKDIR /opt
RUN git clone -b release/rocm-rel-7.1 https://github.com/ROCm/rocWMMA.git
RUN sudo mkdir -p /usr/include/rocwmma
RUN sudo cp -r rocWMMA/library/include/rocwmma /usr/include/
COPY ./build-rocwmma.sh .
RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh
# llama.cpp
WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git .
# Apply # rocWMMA patch
COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh
RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp
# build
RUN git clean -xdf \
&& git pull \
&& git submodule update --recursive \
&& echo -e '#ifndef HIP_HAS_SHFL_SYNC_FUNCS\n#define HIP_HAS_SHFL_SYNC_FUNCS\n#ifndef __shfl_sync\n#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width)\n#endif\n#ifndef __shfl_xor_sync\n#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width)\n#endif\n#ifndef __shfl_up_sync\n#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width)\n#endif\n#endif\n' \
| cat - /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh > /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \
&& mv /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \
&& cmake -S . -B build \
-DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \
+3 -1
View File
@@ -150,4 +150,6 @@ echo " cmake -B build -S '$LLAMA_DIR' -DGGML_HIP=ON -DAMDGPU_TARGETS=\"gfx1151\
echo ""
echo "Backup files were created with .backup extension in case you need to revert."
echo ""
echo "Done! Your llama.cpp checkout now supports rocWMMA builds."
echo "Done! Your llama.cpp checkout now supports rocWMMA builds."
+54 -3
View File
@@ -2,9 +2,60 @@
git clone https://github.com/ROCm/rocWMMA
cd rocWMMA
# Change FP8 check from FAIL to STATUS
# --- BEGIN: make OpenMP explicit for ROCm toolchains (drop-in) ---
# find libomp (check ROCM_PATH first, then system)
CANDIDATES=(
"${ROCM_PATH}/llvm/lib/libomp.so"
"${ROCM_PATH}/llvm/lib/libomp.a"
"/usr/lib64/libomp.so"
"/usr/lib64/libomp.a"
"/usr/local/lib/libomp.so"
)
FOUND_LIBOMP=""
for p in "${CANDIDATES[@]}"; do
if [ -f "$p" ]; then
FOUND_LIBOMP="$p"
break
fi
done
rm -rf build; mkdir build; CC=$ROCM_PATH/llvm/bin/amdclang CXX=$ROCM_PATH/llvm/bin/amdclang++ cmake -B build . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCWMMA_BUILD_TESTS=OFF -DROCWMMA_BUILD_SAMPLES=OFF -DGPU_TARGETS="gfx1151"
CMAKE_OPTS=""
if [ -n "$FOUND_LIBOMP" ]; then
# directory & basename
OMP_LIB_DIR="$(dirname "$FOUND_LIBOMP")"
OMP_LIB_BASENAME="$(basename "$FOUND_LIBOMP")"
# set cache vars so FindOpenMP will succeed
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_CXX_FLAGS=-fopenmp=libomp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_C_FLAGS=-fopenmp=libomp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_CXX_LIB_NAMES=omp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_C_LIB_NAMES=omp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_LIBRARY=${FOUND_LIBOMP}"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_INCLUDE_DIR=${ROCM_PATH}/llvm/include"
export LD_LIBRARY_PATH="${OMP_LIB_DIR}${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH"
export CXXFLAGS="-fopenmp=libomp ${CXXFLAGS:-}"
export LDFLAGS="-L${OMP_LIB_DIR} -lomp ${LDFLAGS:-}"
else
# fallback: force flags so FindOpenMP might at least get flags
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_CXX_FLAGS=-fopenmp=libomp -DOpenMP_C_FLAGS=-fopenmp=libomp"
export CXXFLAGS="-fopenmp=libomp ${CXXFLAGS:-}"
export LDFLAGS="${LDFLAGS:-} -lomp"
fi
# --- END: make OpenMP explicit ---
cmake --build build -j$(nproc)
CC=$ROCM_PATH/llvm/bin/amdclang \
CXX=$ROCM_PATH/llvm/bin/amdclang++ \
cmake -B build -S . -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$ROCM_PATH \
-DROCWMMA_BUILD_TESTS=OFF \
-DROCWMMA_BUILD_SAMPLES=OFF \
-DGPU_TARGETS="gfx1151" \
-DOpenMP_CXX_FLAGS="-fopenmp=libomp" \
-DOpenMP_C_FLAGS="-fopenmp=libomp" \
-DOpenMP_omp_LIBRARY="/usr/lib64/libomp.so" \
-DOpenMP_CXX_LIB_NAMES="omp" \
-DOpenMP_C_LIB_NAMES="omp" \
-DOpenMP_INCLUDE_DIRS="/usr/lib64/clang/19/include"
cmake --install build
sudo cmake --install build