源码安装 AMD 的 gpu BLAS rocBLAS
安装 ROCm。
·
1,预备环境
安装 ROCm
2,下载预备源代码
$ git clone --recursive git@github.com:ROCm/rocBLAS.git
### git clone --recursive https://github.com/ROCm/rocBLAS.git
cd rocBLAS
git checkout rocm-6.0.2
3, 编译 Debug 版本
$ conda deactivate
$ conda deactivate
$ conda deactivate
开启 cmake 变量,显示编译命令细节
export CXX=/opt/rocm/bin/hipcc \
&& cmake -DCMAKE_INSTALL_PREFIX=../../../local_d_noT_906_908_1030/rocblas/ \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_RULE_MESSAGES:BOOL=OFF \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-DCMAKE_Fortran_COMPILER="gfortran" \
-DBUILD_CLIENTS_TESTS:BOOL=ON \
-DBUILD_CLIENTS_BENCHMARKS:BOOL=ON \
-DBUILD_CLIENTS_SAMPLES:BOOL=ON \
-DBUILD_WITH_TENSILE=OFF \
../../
$ make -j
编译成果:
only:
-DCMAKE_VERBOSE_MAKEFILE=ON
export CXX=/opt/rocm/bin/hipcc \
&& cmake -DCMAKE_INSTALL_PREFIX=../../../local_d_cmake/rocblas/ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_VERBOSE_MAKEFILE=ON ../../
记录一条编译 cpp 文件的核心命令:
/opt/rocm/bin/hipcc
-DROCBLAS_BETA_FEATURES_API
-DROCBLAS_INTERNAL_API
-DROCM_USE_FLOAT16
-DUSE_PROF_API=1
-D__HIP_PLATFORM_AMD__=1
-D__STDC_WANT_IEC_60559_TYPES_EXT__
-Drocblas_EXPORTS
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/include
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/include/internal
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/src/include
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/build_no_ten_cmake/debug/include/rocblas/internal
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/build_no_ten_cmake/debug/include/rocblas
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/build_no_ten_cmake/debug/include
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/src/blas3/Tensile
-D__HIP_HCC_COMPAT_MODE__=1
-O1 -g -gz -ggdb
-std=c++17
-fPIC
-fvisibility=hidden
-fvisibility-inlines-hidden
-Wno-unused-result
-mf16c
-Werror=vla
-x hip
--offload-arch=gfx900
--offload-arch=gfx906:xnack-
--offload-arch=gfx908:xnack-
--offload-arch=gfx90a:xnack+
--offload-arch=gfx90a:xnack-
--offload-arch=gfx940
--offload-arch=gfx941
--offload-arch=gfx942
--offload-arch=gfx1010
--offload-arch=gfx1012
--offload-arch=gfx1030
--offload-arch=gfx1100
--offload-arch=gfx1101
--offload-arch=gfx1102
-MD -MT library/src/CMakeFiles/rocblas.dir/blas_ex/rocblas_axpy_ex.cpp.o
-MF CMakeFiles/rocblas.dir/blas_ex/rocblas_axpy_ex.cpp.o.d
-o CMakeFiles/rocblas.dir/blas_ex/rocblas_axpy_ex.cpp.o
-c /home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/src/blas_ex/rocblas_axpy_ex.cpp
修改CMakeLists.txt 只编译将906和941架构的二进制
export CXX=/opt/rocm/bin/hipcc \
&& cmake -DCMAKE_INSTALL_PREFIX=../../../local_d_notensile_906_941/rocblas/ \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_RULE_MESSAGES:BOOL=OFF \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
../../
4, 编译 Release 版本
export CXX=/opt/rocm/bin/hipcc \
&& cmake -DCMAKE_INSTALL_PREFIX=../../../local_d_cmake/rocblas/ -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE=ON ../../
记录一条编译 cpp 文件的核心命令:
/opt/rocm/bin/hipcc
-DROCBLAS_BETA_FEATURES_API
-DROCBLAS_INTERNAL_API
-DROCM_USE_FLOAT16
-DUSE_PROF_API=1
-D__HIP_PLATFORM_AMD__=1
-D__STDC_WANT_IEC_60559_TYPES_EXT__
-Drocblas_EXPORTS
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/include
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/include/internal
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/src/include
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/build_no_ten_cmake/release/include/rocblas/internal
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/build_no_ten_cmake/release/include/rocblas
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/build_no_ten_cmake/release/include
-I/home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/src/blas3/Tensile
-D__HIP_HCC_COMPAT_MODE__=1
-O3
-DNDEBUG
-std=c++17
-fPIC
-fvisibility=hidden
-fvisibility-inlines-hidden
-Wno-unused-result
-mf16c
-Werror=vla
-x hip
--offload-arch=gfx900
--offload-arch=gfx906:xnack-
--offload-arch=gfx908:xnack-
--offload-arch=gfx90a:xnack+
--offload-arch=gfx90a:xnack-
--offload-arch=gfx940
--offload-arch=gfx941
--offload-arch=gfx942
--offload-arch=gfx1010
--offload-arch=gfx1012
--offload-arch=gfx1030
--offload-arch=gfx1100
--offload-arch=gfx1101
--offload-arch=gfx1102
-MD -MT library/src/CMakeFiles/rocblas.dir/blas_ex/rocblas_nrm2_batched_ex.cpp.o
-MF CMakeFiles/rocblas.dir/blas_ex/rocblas_nrm2_batched_ex.cpp.o.d
-o CMakeFiles/rocblas.dir/blas_ex/rocblas_nrm2_batched_ex.cpp.o
-c /home/hipper/rocsolver_on_nv_ex/icla_blas/rocBLAS/library/src/blas_ex/rocblas_nrm2_batched_ex.cpp
更多推荐
已为社区贡献5条内容
所有评论(0)