further fix for rocWMMA

This commit is contained in:
Donato Capitella
2025-10-11 23:05:29 +01:00
parent f62739d243
commit e1f5aaa85c
3 changed files with 72 additions and 13 deletions
+14 -8
View File
@@ -1,11 +1,11 @@
# build stage # build stage
FROM registry.fedoraproject.org/fedora:rawhide AS builder FROM registry.fedoraproject.org/fedora:rawhide AS builder
# rocm 6.4.6 repo # rocm 6.4.4 repo
RUN <<'EOF' RUN <<'EOF'
tee /etc/yum.repos.d/rocm.repo <<REPO tee /etc/yum.repos.d/rocm.repo <<REPO
[ROCm-6.4.6] [ROCm-6.4.4]
name=ROCm6.4.6 name=ROCm6.4.4
baseurl=https://repo.radeon.com/rocm/el9/6.4.4/main baseurl=https://repo.radeon.com/rocm/el9/6.4.4/main
enabled=1 enabled=1
priority=50 priority=50
@@ -18,9 +18,9 @@ EOF
RUN dnf -y --nodocs --setopt=install_weak_deps=False \ RUN dnf -y --nodocs --setopt=install_weak_deps=False \
--exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \ --exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \
install \ install \
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ make gcc cmake lld clang clang-devel compiler-rt libcurl-devel ninja-build \
rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \ rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \
rocblas rocblas-devel hipblas hipblas-devel \ rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \
rocminfo radeontop \ rocminfo radeontop \
git-core vim sudo rsync \ git-core vim sudo rsync \
&& dnf clean all && rm -rf /var/cache/dnf/* && dnf clean all && rm -rf /var/cache/dnf/*
@@ -34,18 +34,24 @@ ENV ROCM_PATH=/opt/rocm \
# rocWMMA # rocWMMA
WORKDIR /opt WORKDIR /opt
RUN git clone -b release/rocm-rel-7.1 https://github.com/ROCm/rocWMMA.git COPY ./build-rocwmma.sh .
RUN sudo mkdir -p /usr/include/rocwmma RUN chmod +x build-rocwmma.sh && ./build-rocwmma.sh
RUN sudo cp -r rocWMMA/library/include/rocwmma /usr/include/
# llama.cpp # llama.cpp
WORKDIR /opt/llama.cpp WORKDIR /opt/llama.cpp
RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git . RUN git clone --recursive https://github.com/ggerganov/llama.cpp.git .
# Apply # rocWMMA patch
COPY ./apply-rocwmma-fix.sh /opt/apply-rocwmma-fix.sh
RUN chmod +x /opt/apply-rocwmma-fix.sh && /opt/apply-rocwmma-fix.sh /opt/llama.cpp
# build # build
RUN git clean -xdf \ RUN git clean -xdf \
&& git pull \ && git pull \
&& git submodule update --recursive \ && git submodule update --recursive \
&& echo -e '#ifndef HIP_HAS_SHFL_SYNC_FUNCS\n#define HIP_HAS_SHFL_SYNC_FUNCS\n#ifndef __shfl_sync\n#define __shfl_sync(mask,var,srcLane,width) __shfl(var,srcLane,width)\n#endif\n#ifndef __shfl_xor_sync\n#define __shfl_xor_sync(mask,var,laneMask,width) __shfl_xor(var,laneMask,width)\n#endif\n#ifndef __shfl_up_sync\n#define __shfl_up_sync(mask,var,delta,width) __shfl_up(var,delta,width)\n#endif\n#endif\n' \
| cat - /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh > /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp \
&& mv /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh.tmp /opt/llama.cpp/ggml/src/ggml-cuda/mma.cuh \
&& cmake -S . -B build \ && cmake -S . -B build \
-DGGML_HIP=ON \ -DGGML_HIP=ON \
-DAMDGPU_TARGETS=gfx1151 \ -DAMDGPU_TARGETS=gfx1151 \
+2
View File
@@ -151,3 +151,5 @@ echo ""
echo "Backup files were created with .backup extension in case you need to revert." echo "Backup files were created with .backup extension in case you need to revert."
echo "" echo ""
echo "Done! Your llama.cpp checkout now supports rocWMMA builds." echo "Done! Your llama.cpp checkout now supports rocWMMA builds."
+54 -3
View File
@@ -2,9 +2,60 @@
git clone https://github.com/ROCm/rocWMMA git clone https://github.com/ROCm/rocWMMA
cd rocWMMA cd rocWMMA
# Change FP8 check from FAIL to STATUS # --- BEGIN: make OpenMP explicit for ROCm toolchains (drop-in) ---
# find libomp (check ROCM_PATH first, then system)
CANDIDATES=(
"${ROCM_PATH}/llvm/lib/libomp.so"
"${ROCM_PATH}/llvm/lib/libomp.a"
"/usr/lib64/libomp.so"
"/usr/lib64/libomp.a"
"/usr/local/lib/libomp.so"
)
FOUND_LIBOMP=""
for p in "${CANDIDATES[@]}"; do
if [ -f "$p" ]; then
FOUND_LIBOMP="$p"
break
fi
done
rm -rf build; mkdir build; CC=$ROCM_PATH/llvm/bin/amdclang CXX=$ROCM_PATH/llvm/bin/amdclang++ cmake -B build . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCWMMA_BUILD_TESTS=OFF -DROCWMMA_BUILD_SAMPLES=OFF -DGPU_TARGETS="gfx1151" CMAKE_OPTS=""
if [ -n "$FOUND_LIBOMP" ]; then
# directory & basename
OMP_LIB_DIR="$(dirname "$FOUND_LIBOMP")"
OMP_LIB_BASENAME="$(basename "$FOUND_LIBOMP")"
# set cache vars so FindOpenMP will succeed
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_CXX_FLAGS=-fopenmp=libomp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_C_FLAGS=-fopenmp=libomp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_CXX_LIB_NAMES=omp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_C_LIB_NAMES=omp"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_LIBRARY=${FOUND_LIBOMP}"
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_INCLUDE_DIR=${ROCM_PATH}/llvm/include"
export LD_LIBRARY_PATH="${OMP_LIB_DIR}${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH"
export CXXFLAGS="-fopenmp=libomp ${CXXFLAGS:-}"
export LDFLAGS="-L${OMP_LIB_DIR} -lomp ${LDFLAGS:-}"
else
# fallback: force flags so FindOpenMP might at least get flags
CMAKE_OPTS="${CMAKE_OPTS} -DOpenMP_CXX_FLAGS=-fopenmp=libomp -DOpenMP_C_FLAGS=-fopenmp=libomp"
export CXXFLAGS="-fopenmp=libomp ${CXXFLAGS:-}"
export LDFLAGS="${LDFLAGS:-} -lomp"
fi
# --- END: make OpenMP explicit ---
cmake --build build -j$(nproc) CC=$ROCM_PATH/llvm/bin/amdclang \
CXX=$ROCM_PATH/llvm/bin/amdclang++ \
cmake -B build -S . -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$ROCM_PATH \
-DROCWMMA_BUILD_TESTS=OFF \
-DROCWMMA_BUILD_SAMPLES=OFF \
-DGPU_TARGETS="gfx1151" \
-DOpenMP_CXX_FLAGS="-fopenmp=libomp" \
-DOpenMP_C_FLAGS="-fopenmp=libomp" \
-DOpenMP_omp_LIBRARY="/usr/lib64/libomp.so" \
-DOpenMP_CXX_LIB_NAMES="omp" \
-DOpenMP_C_LIB_NAMES="omp" \
-DOpenMP_INCLUDE_DIRS="/usr/lib64/clang/19/include"
cmake --install build
sudo cmake --install build sudo cmake --install build