diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fa8135e0763..c7985bdf11e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -106,7 +106,6 @@ build/cuda101/nompi/clang/cuda_wo_omp/release/shared: variables: CXX_COMPILER: "clang++" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_HWLOC: "OFF" BUILD_TYPE: "Release" CUDA_ARCH: 35 @@ -124,7 +123,6 @@ build/cuda101/openmpi/gcc/all/debug/shared: BUILD_CUDA: "ON" BUILD_MPI: "ON" MPI_AS_ROOT: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Debug" BUILD_PAPI_SDE: "ON" CUDA_ARCH: 35 @@ -139,7 +137,6 @@ build/cuda101/nompi/clang/all/release/static: CXX_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 @@ -190,7 +187,6 @@ build/cuda102/nompi/gcc/all/debug/shared: variables: BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Debug" FAST_TESTS: "ON" BUILD_HWLOC: "OFF" @@ -207,7 +203,6 @@ build/cuda102/nompi/clang/all/release/static: CXX_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 @@ -425,25 +420,25 @@ build/amd/nompi/clang/rocm45/debug/shared: BUILD_HIP: "ON" BUILD_TYPE: "Debug" -# ROCm 5.0.2 and friends -build/amd/nompi/gcc/rocm502/debug/static: +# ROCm 5.1.4 and friends +build/amd/nompi/gcc/rocm514/debug/static: extends: - .build_and_test_template - .default_variables - .full_test_condition - - .use_gko-rocm502-nompi-gnu11-llvm11 + - .use_gko-rocm514-nompi-gnu11-llvm11 variables: BUILD_OMP: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Debug" BUILD_SHARED_LIBS: "OFF" -build/amd/nompi/clang/rocm502/release/shared: +build/amd/nompi/clang/rocm514/release/shared: extends: - .build_and_test_template - .default_variables - .quick_test_condition - - .use_gko-rocm502-nompi-gnu11-llvm11 + - .use_gko-rocm514-nompi-gnu11-llvm11 variables: CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -451,12 +446,12 @@ build/amd/nompi/clang/rocm502/release/shared: BUILD_TYPE: "Release" # without omp -build/amd/nompi/gcc/rocm502_wo_omp/release/shared: +build/amd/nompi/gcc/rocm514_wo_omp/release/shared: extends: - .build_and_test_template - .default_variables - .full_test_condition - - .use_gko-rocm502-nompi-gnu11-llvm11 + - .use_gko-rocm514-nompi-gnu11-llvm11 variables: BUILD_OMP: "OFF" BUILD_MPI: "OFF" @@ -686,7 +681,6 @@ warnings: variables: BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" CXX_FLAGS: "-Werror=pedantic;-pedantic-errors" allow_failure: yes @@ -701,7 +695,6 @@ no-circular-deps: variables: BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" EXTRA_CMAKE_FLAGS: '-DGINKGO_CHECK_CIRCULAR_DEPS=on' allow_failure: no @@ -729,7 +722,6 @@ clang-tidy: variables: BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" EXTRA_CMAKE_FLAGS: '-DGINKGO_WITH_CLANG_TIDY=ON' allow_failure: yes diff --git a/.gitlab/image.yml b/.gitlab/image.yml index eb1ab5128af..2be565b6b10 100644 --- a/.gitlab/image.yml +++ b/.gitlab/image.yml @@ -74,8 +74,8 @@ - private_ci - amd-gpu -.use_gko-rocm502-nompi-gnu11-llvm11: - image: ginkgohub/rocm:502-openmpi-gnu11-llvm11 +.use_gko-rocm514-nompi-gnu11-llvm11: + image: ginkgohub/rocm:514-openmpi-gnu11-llvm11 tags: - private_ci - amd-gpu diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml index b58d21de261..dda127ff535 100644 --- a/.gitlab/scripts.yml +++ b/.gitlab/scripts.yml @@ -106,7 +106,9 @@ - popd - if [ -n "${SYCL_DEVICE_TYPE}" ]; then unset SYCL_DEVICE_TYPE; fi - if [ -n "${SYCL_DEVICE_FILTER}" ]; then unset SYCL_DEVICE_FILTER; fi - - PKG_CONFIG_PATH=${INSTALL_PREFIX}/lib/pkgconfig:$PKG_CONFIG_PATH LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH ninja test_pkgconfig + - if [[ "${BUILD_SHARED_LIBS}" == "ON" || "${BUILD_HIP}" != "ON" ]]; then + PKG_CONFIG_PATH=${INSTALL_PREFIX}/lib/pkgconfig:$PKG_CONFIG_PATH LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH ninja test_pkgconfig; + fi dependencies: [] @@ -136,7 +138,9 @@ - pushd test/test_install - ninja install - popd - - PKG_CONFIG_PATH=${INSTALL_PREFIX}/lib/pkgconfig:$PKG_CONFIG_PATH LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH ninja test_pkgconfig + - if [[ "${BUILD_SHARED_LIBS}" == "ON" || "${BUILD_HIP}" != "ON" ]]; then + PKG_CONFIG_PATH=${INSTALL_PREFIX}/lib/pkgconfig:$PKG_CONFIG_PATH LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH ninja test_pkgconfig; + fi cache: [] diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b89f2dbf05..3e17446854e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,14 +52,6 @@ set(GINKGO_CUDA_ARCHITECTURES "Auto" CACHE STRING "A list of target NVIDIA GPU architectures. See README.md for more detail.") # the details of fine/coarse grain memory and unsafe atomic are available https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#floating-point-fp-atomic-operations-and-coarse-fine-grained-memory-allocations option(GINKGO_HIP_AMD_UNSAFE_ATOMIC "Compiler uses unsafe floating point atomic (only for AMD GPU and ROCM >= 5). Default is ON because we use hipMalloc, which is always on coarse grain. Must turn off when allocating memory on fine grain" ON) -set(GINKGO_HIP_COMPILER_FLAGS "" CACHE STRING - "Set the required HIP compiler flags. Current default is an empty string.") -set(GINKGO_HIP_NVCC_COMPILER_FLAGS "" CACHE STRING - "Set the required HIP nvcc compiler flags. Current default is an empty string.") -set(GINKGO_HIP_CLANG_COMPILER_FLAGS "" CACHE STRING - "Set the required HIP CLANG compiler flags. Current default is an empty string.") -set(GINKGO_HIP_AMDGPU "" CACHE STRING - "The amdgpu_target(s) variable passed to hipcc. The default is none (auto).") option(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS "Split template instantiations for slow-to-compile files. This improves parallel build performance" ON) mark_as_advanced(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS) option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF) @@ -144,12 +136,6 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) endif() -if(BUILD_SHARED_LIBS) - set(GINKGO_STATIC_OR_SHARED SHARED) -else() - set(GINKGO_STATIC_OR_SHARED STATIC) -endif() - # Ensure we have a debug postfix if(NOT DEFINED CMAKE_DEBUG_POSTFIX) set(CMAKE_DEBUG_POSTFIX "d") @@ -481,22 +467,26 @@ add_custom_target(test_exportbuild COMMAND ${GINKGO_TEST_EXPORTBUILD_CMD} COMMENT "Running a test on Ginkgo's exported build directory.") -add_custom_target(test_pkgconfig - COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${TOOLSET} - -S${GINKGO_TEST_PKGCONFIG_SRC_DIR} - -B${GINKGO_TEST_PKGCONFIG_BIN_DIR} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - # `--config cfg` is ignored by single-configuration generator. - # `$` is always be the same as `CMAKE_BUILD_TYPE` in - # single-configuration generator. - COMMAND ${CMAKE_COMMAND} - --build ${GINKGO_TEST_PKGCONFIG_BIN_DIR} - --config $ - COMMAND ${GINKGO_TEST_PKGCONFIG_CMD} - COMMENT "Running a test on Ginkgo's PkgConfig" - "This requires installing Ginkgo first") +# static linking with pkg-config is not possible with HIP, since +# some linker information cannot be expressed in pkg-config files +if (BUILD_SHARED_LIBS OR NOT GINKGO_BUILD_HIP) + add_custom_target(test_pkgconfig + COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${TOOLSET} + -S${GINKGO_TEST_PKGCONFIG_SRC_DIR} + -B${GINKGO_TEST_PKGCONFIG_BIN_DIR} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + # `--config cfg` is ignored by single-configuration generator. + # `$` is always be the same as `CMAKE_BUILD_TYPE` in + # single-configuration generator. + COMMAND ${CMAKE_COMMAND} + --build ${GINKGO_TEST_PKGCONFIG_BIN_DIR} + --config $ + COMMAND ${GINKGO_TEST_PKGCONFIG_CMD} + COMMENT "Running a test on Ginkgo's PkgConfig" + "This requires installing Ginkgo first") +endif() # Setup CPack diff --git a/INSTALL.md b/INSTALL.md index 045d5f93a09..9719bdfb920 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -51,8 +51,8 @@ Ginkgo adds the following additional switches to control what is being built: * `-DGINKGO_BUILD_HIP={ON, OFF}` builds optimized HIP versions of the kernels (requires HIP), default is `ON` if an installation of HIP could be detected, `OFF` otherwise. -* `-DGINKGO_HIP_AMDGPU="gpuarch1;gpuarch2"` the amdgpu_target(s) variable - passed to hipcc for the `hcc` HIP backend. The default is none (auto). +* `-DCMAKE_HIP_ARCHITECTURES="gpuarch1;gpuarch2"` the AMDGPU targets to be passed to the compiler. + If empty, compiler chooses based on the available GPUs. * `-DGINKGO_BUILD_HWLOC={ON, OFF}` builds Ginkgo with HWLOC. Default is `OFF`. * `-DGINKGO_BUILD_DOC={ON, OFF}` creates an HTML version of Ginkgo's documentation from inline comments in the code. The default is `OFF`. @@ -181,22 +181,13 @@ imposed by the `HIP` tool suite. The variables are the following: #### HIP platform detection of AMD and NVIDIA -By default, Ginkgo uses the output of `/opt/rocm/hip/bin/hipconfig --platform` -to select the backend. The accepted values are either `hcc` (`amd` with ROCM >= -4.1) or `nvcc` (`nvidia` with ROCM >= 4.1). When on an AMD or NVIDIA system, -this should output the correct platform by default. When on a system without -GPUs, this should output `hcc` by default. To change this value, export the -environment variable `HIP_PLATFORM` like so: +Ginkgo relies on CMake to decide which compiler to use for HIP. +To choose `nvcc` instead of the default ROCm `clang++`, set the corresponding +environment variable: ```bash -export HIP_PLATFORM=nvcc # or nvidia for ROCM >= 4.1 +export HIPCXX=nvcc ``` - -#### Setting platform specific compilation flags -Platform specific compilation flags can be given through the following CMake -variables: -+ `-DGINKGO_HIP_COMPILER_FLAGS=`: compilation flags given to all platforms. -+ `-DGINKGO_HIP_NVCC_COMPILER_FLAGS=`: compilation flags given to NVIDIA platforms. -+ `-DGINKGO_HIP_CLANG_COMPILER_FLAGS=`: compilation flags given to AMD clang compiler. +Note that this option is currently not being tested in our CI pipelines. ### Third party libraries and packages diff --git a/README.md b/README.md index 49d5f23f300..f34582539a1 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ The Ginkgo HIP module has the following __additional__ requirements: * _AMD_ backend (using the `clang` compiler) * _10.1 <= CUDA < 11_ backend * if the hipFFT package is available, it is used to implement the FFT LinOps. +* _cmake 3.21+_ The Ginkgo DPC++(SYCL) module has the following __additional__ requirements: diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 6b8ce0687d6..ca209e65057 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -25,14 +25,9 @@ endfunction() function(ginkgo_benchmark_hipsparse_linops type def) add_library(hipsparse_linops_${type} utils/hip_linops.hip.cpp) + set_source_files_properties(utils/hip_linops.hip.cpp PROPERTIES LANGUAGE HIP) target_compile_definitions(hipsparse_linops_${type} PUBLIC ${def}) - EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) - set_target_properties(hipsparse_linops_${type} PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS}) - # use Thrust C++ device just for compilation, we don't use thrust::complex in the benchmarks - target_compile_definitions(hipsparse_linops_${type} PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP) - target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE - ${HSA_HEADER} ${HIP_INCLUDE_DIRS} - ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS}) + target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS}) target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES}) endfunction() @@ -126,10 +121,8 @@ if (GINKGO_BUILD_HIP) ginkgo_benchmark_hipsparse_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION) ginkgo_benchmark_hipsparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION) ginkgo_benchmark_hipsparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION) + set_source_files_properties(utils/hip_timer.hip.cpp PROPERTIES LANGUAGE HIP) add_library(hip_timer utils/hip_timer.hip.cpp) - EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) - set_target_properties(hip_timer PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS}) - target_include_directories(hip_timer SYSTEM PRIVATE ${HSA_HEADER} ${HIP_INCLUDE_DIRS}) target_link_libraries(hip_timer ginkgo) endif() diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index 0776801aa99..23b1d25adc1 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -63,16 +63,10 @@ set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@) set(GINKGO_CUDA_ARCHITECTURES "@CMAKE_CUDA_ARCHITECTURES@") set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@") -set(GINKGO_HIP_COMPILER_FLAGS "@GINKGO_HIP_COMPILER_FLAGS@") -set(GINKGO_HIP_HCC_COMPILER_FLAGS "@GINKGO_HIP_HCC_COMPILER_FLAGS@") -set(GINKGO_HIP_NVCC_COMPILER_FLAGS "@GINKGO_HIP_NVCC_COMPILER_FLAGS@") -set(GINKGO_HIP_CLANG_COMPILER_FLAGS "@GINKGO_HIP_CLANG_COMPILER_FLAGS@") -set(GINKGO_HIP_PLATFORM @GINKGO_HIP_PLATFORM@) -set(GINKGO_HIP_PLATFORM_AMD_REGEX "@HIP_PLATFORM_AMD_REGEX@") -set(GINKGO_HIP_PLATFORM_NVIDIA_REGEX "@HIP_PLATFORM_NVIDIA_REGEX@") -set(GINKGO_HIP_AMDGPU "@GINKGO_HIP_AMDGPU@") -set(GINKGO_HIP_VERSION @GINKGO_HIP_VERSION@) -set(GINKGO_AMD_ARCH_FLAGS "@GINKGO_AMD_ARCH_FLAGS@") +set(GINKGO_HIP_COMPILER_FLAGS "@CMAKE_HIP_COMPILER_FLAGS@") +set(GINKGO_HIP_PLATFORM "@GINKGO_HIP_PLATFORM@") +set(GINKGO_HIP_VERSION "@GINKGO_HIP_VERSION@") +set(GINKGO_HIP_ARCHITECTURES "@CMAKE_HIP_ARCHITECTURES@") set(GINKGO_DPCPP_VERSION @GINKGO_DPCPP_VERSION@) set(GINKGO_DPCPP_MAJOR_VERSION @GINKGO_DPCPP_MAJOR_VERSION@) @@ -175,7 +169,7 @@ if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_CUDA) endif() if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_HIP) - find_dependency(HIP) + enable_language(HIP) find_dependency(hipblas) find_dependency(hipfft) find_dependency(hiprand) @@ -217,6 +211,9 @@ if(GINKGO_BUILD_CUDA) _ginkgo_check_compiler(CUDA) _ginkgo_check_compiler(CUDA_HOST) endif() +if(GINKGO_BUILD_HIP) + _ginkgo_check_compiler(HIP) +endif() include(${CMAKE_CURRENT_LIST_DIR}/GinkgoTargets.cmake) diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake index 757262f1ea1..d3ad2e3a6a1 100644 --- a/cmake/autodetect_executors.cmake +++ b/cmake/autodetect_executors.cmake @@ -31,7 +31,8 @@ if (NOT DEFINED GINKGO_BUILD_CUDA) endif() if (NOT DEFINED GINKGO_BUILD_HIP) - if(GINKGO_HIPCONFIG_PATH) + check_language(HIP) + if(CMAKE_HIP_COMPILER) message(STATUS "Enabling HIP executor") set(GINKGO_HAS_HIP ON) endif() diff --git a/cmake/build_helpers.cmake b/cmake/build_helpers.cmake index 34189a09450..a1a1735f84e 100644 --- a/cmake/build_helpers.cmake +++ b/cmake/build_helpers.cmake @@ -19,6 +19,10 @@ endfunction() function(ginkgo_compile_features name) target_compile_features("${name}" PUBLIC cxx_std_14) + # we set these properties regardless of the enabled backends, + # because unknown properties are ignored + set_target_properties("${name}" PROPERTIES HIP_STANDARD 14) + set_target_properties("${name}" PROPERTIES CUDA_STANDARD 14) if(GINKGO_WITH_CLANG_TIDY AND GINKGO_CLANG_TIDY_PATH) set_property(TARGET "${name}" PROPERTY CXX_CLANG_TIDY "${GINKGO_CLANG_TIDY_PATH};-checks=*") endif() @@ -93,17 +97,12 @@ function(ginkgo_check_headers target defines) list(APPEND HIP_SOURCES "${HEADER_SOURCEFILE}") endforeach() if(HIP_SOURCES) - set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) - hip_add_library(${target}_headers_hip ${HIP_SOURCES}) # the compiler options get set by linking to ginkgo_hip + set_source_files_properties(${HIP_SOURCES} PROPERTIES LANGUAGE HIP) + add_library(${target}_headers_hip ${HIP_SOURCES}) # the compiler options get set by linking to ginkgo_hip target_link_libraries(${target}_headers_hip PRIVATE ${target} roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) target_include_directories(${target}_headers_hip PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}" - "${GINKGO_HIP_THRUST_PATH}" - "${HIPBLAS_INCLUDE_DIRS}" - "${hiprand_INCLUDE_DIRS}" - "${HIPSPARSE_INCLUDE_DIRS}" - "${ROCPRIM_INCLUDE_DIRS}") + "${CMAKE_CURRENT_SOURCE_DIR}") endif() endfunction() diff --git a/cmake/build_type_helpers.cmake b/cmake/build_type_helpers.cmake index 09fc4a7dc9b..0c6b386bb21 100644 --- a/cmake/build_type_helpers.cmake +++ b/cmake/build_type_helpers.cmake @@ -93,7 +93,7 @@ foreach(_LANG IN LISTS ENABLED_LANGUAGES ITEMS "HIP") set(${PROJECT_NAME}_${_LANG}_${_TYPE}_SUPPORTED FALSE) endif() if(${PROJECT_NAME}_${_LANG}_${_TYPE}_SUPPORTED) - if(_LANG STREQUAL "HIP" AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") + if(_LANG STREQUAL "HIP" AND GINKGO_HIP_PLATFORM_NVIDIA) set(CMAKE_${_LANG}_FLAGS_${_TYPE} ${${PROJECT_NAME}_NVCC_${_TYPE}_COMPILER_FLAGS} CACHE STRING "Flags used by the ${_LANG} compiler during ${_TYPE} builds." FORCE diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index c2f5c1fb94f..ecb75d5da39 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -33,6 +33,10 @@ function(ginkgo_set_test_target_properties test_target_name test_library_suffix) target_link_libraries(${test_target_name} PRIVATE ginkgo_gtest_main${test_library_suffix}) endif() target_compile_features(${test_target_name} PUBLIC cxx_std_14) + # we set these properties regardless of the enabled backends, + # because unknown properties are ignored + set_target_properties(${test_target_name} PROPERTIES HIP_STANDARD 14) + set_target_properties(${test_target_name} PROPERTIES CUDA_STANDARD 14) target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR} ${set_properties_ADDITIONAL_INCLUDES}) target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES}) endfunction() @@ -104,7 +108,7 @@ function(ginkgo_add_test test_name test_target_name) if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_CUDA) set(test_preload $:${test_preload}) endif() - if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") + if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM_AMD) set(test_preload $:${test_preload}) endif() if(test_preload) @@ -167,55 +171,14 @@ endfunction(ginkgo_create_cuda_test_internal) ## Test compiled with HIP function(ginkgo_create_hip_test test_name) ginkgo_build_test_name(${test_name} test_target_name) - ginkgo_create_hip_test_internal(${test_name} ${test_name}.hip.cpp ${test_target_name} "" ${ARGN}) + ginkgo_create_hip_test_internal(${test_name} ${test_name}.hip.cpp ${test_target_name} ${ARGN}) endfunction(ginkgo_create_hip_test) ## Internal function allowing separate filename, test name and test target name. -function(ginkgo_create_hip_test_internal test_name filename test_target_name additional_flags) - set_source_files_properties(${filename} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) - set(GINKGO_TEST_HIP_DEFINES -DGKO_COMPILING_HIP ${additional_flags}) - if (GINKGO_FAST_TESTS) - list(APPEND GINKGO_TEST_HIP_DEFINES -DGINKGO_FAST_TESTS) - endif() - if (GINKGO_TEST_NONDEFAULT_STREAM) - list(APPEND GINKGO_TEST_HIP_DEFINES -DGKO_TEST_NONDEFAULT_STREAM) - endif() - - # NOTE: With how HIP works, passing the flags `HIPCC_OPTIONS` etc. here - # creates a redefinition of all flags. This creates some issues with `nvcc`, - # but `clang` seems fine with the redefinitions. - if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - hip_add_executable(${test_target_name} ${filename} - # If `FindHIP.cmake`, namely `HIP_PARSE_HIPCC_OPTIONS` macro and - # call gets fixed, uncomment this. - HIPCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} # ${GINKGO_HIPCC_OPTIONS} - # NVCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_NVCC_OPTIONS} - # CLANG_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_CLANG_OPTIONS} - --expt-relaxed-constexpr --expt-extended-lambda - ) - else() # hcc/clang - hip_add_executable(${test_target_name} ${filename} - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} ${GINKGO_TEST_HIP_DEFINES} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} - CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} - ) - endif() - - # Let's use a normal compiler for linking - set_target_properties(${test_target_name} PROPERTIES LINKER_LANGUAGE CXX) - - target_include_directories(${test_target_name} - PRIVATE - # Only `math` requires it so far, but it's much easier - # to put these this way. - ${GINKGO_HIP_THRUST_PATH} - # Only `exception_helpers` requires these so far, but it's much easier - # to put these this way. - ${HIPBLAS_INCLUDE_DIRS} - ${HIPFFT_INCLUDE_DIRS} - ${hiprand_INCLUDE_DIRS} - ${HIPSPARSE_INCLUDE_DIRS} - ) +function(ginkgo_create_hip_test_internal test_name filename test_target_name) + set_source_files_properties(${filename} PROPERTIES LANGUAGE HIP) + add_executable(${test_target_name} ${filename}) + target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_HIP) ginkgo_set_test_target_properties(${test_target_name} "_hip" ${ARGN}) ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE hipgpu) endfunction(ginkgo_create_hip_test_internal) @@ -317,7 +280,8 @@ function(ginkgo_create_common_device_test test_name) if(GINKGO_BUILD_HIP) # need to make a separate file for this, since we can't set conflicting properties on the same file configure_file(${test_name}.cpp ${test_name}.hip.cpp COPYONLY) - ginkgo_create_hip_test_internal(${test_name}_hip ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.hip.cpp ${test_target_name}_hip "-std=c++14;-DEXEC_TYPE=HipExecutor;-DEXEC_NAMESPACE=hip" ${ARGN}) + ginkgo_create_hip_test_internal(${test_name}_hip ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.hip.cpp ${test_target_name}_hip ${ARGN}) + target_compile_definitions(${test_target_name}_hip PRIVATE EXEC_TYPE=HipExecutor EXEC_NAMESPACE=hip) endif() endfunction(ginkgo_create_common_device_test) diff --git a/cmake/hip.cmake b/cmake/hip.cmake index 72a7a3a86d8..327375bfe76 100644 --- a/cmake/hip.cmake +++ b/cmake/hip.cmake @@ -1,26 +1,17 @@ -if(DEFINED ENV{HIP_PLATFORM}) - set(GINKGO_HIP_PLATFORM "$ENV{HIP_PLATFORM}") -elseif(GINKGO_HIPCONFIG_PATH) - execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH} - --platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM) +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +enable_language(HIP) +if(CMAKE_HIP_COMPILER_ID STREQUAL "NVIDIA") + set(GINKGO_HIP_PLATFORM "nvidia") + set(GINKGO_HIP_PLATFORM_NVIDIA ON) + set(GINKGO_HIP_PLATFORM_AMD OFF) + set(GINKGO_HIP_PLATFORM_NVCC 1) else() - message(FATAL_ERROR "No platform could be found for HIP. " - "Set and export the environment variable HIP_PLATFORM.") -endif() -message(STATUS "HIP platform set to ${GINKGO_HIP_PLATFORM}") -set(HIP_PLATFORM_AMD_REGEX "hcc|amd") -set(HIP_PLATFORM_NVIDIA_REGEX "nvcc|nvidia") - -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") + set(GINKGO_HIP_PLATFORM "amd") + set(GINKGO_HIP_PLATFORM_NVIDIA OFF) + set(GINKGO_HIP_PLATFORM_AMD ON) set(GINKGO_HIP_PLATFORM_HCC 1) -elseif (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - enable_language(CUDA) - set(GINKGO_HIP_PLATFORM_NVCC 1) endif() -if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.21) - set(CMAKE_HIP_ARCHITECTURES OFF) -endif() if(NOT DEFINED ROCM_PATH) if(DEFINED ENV{ROCM_PATH}) @@ -88,24 +79,6 @@ if(NOT DEFINED ROCTRACER_PATH) endif() endif() -# Find HIPCC_CMAKE_LINKER_HELPER executable -find_program( - HIP_HIPCC_CMAKE_LINKER_HELPER - NAMES hipcc_cmake_linker_helper - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH -) -if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER) - # Now search in default paths - find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper) -endif() - find_program( HIP_HIPCONFIG_EXECUTABLE NAMES hipconfig @@ -130,21 +103,10 @@ execute_process( ERROR_STRIP_TRAILING_WHITESPACE ) -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - # ensure ENV{CUDA_PATH} is set by the user - if (NOT DEFINED ENV{CUDA_PATH}) - find_path(GINKGO_HIP_DEFAULT_CUDA_PATH "cuda.h" PATH /usr/local/cuda/include NO_DEFAULT_PATH) - if (NOT GINKGO_HIP_DEFAULT_CUDA_PATH) - message(FATAL_ERROR "HIP nvidia backend was requested but CUDA could not be " - "located. Set and export the environment variable CUDA_PATH.") - endif() - endif() -endif() - ## Setup all CMAKE variables to find HIP and its dependencies set(GINKGO_HIP_MODULE_PATH "${HIP_PATH}/cmake") list(APPEND CMAKE_MODULE_PATH "${GINKGO_HIP_MODULE_PATH}") -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") +if (GINKGO_HIP_PLATFORM_AND) list(APPEND CMAKE_PREFIX_PATH "${HIP_PATH}/lib/cmake") endif() list(APPEND CMAKE_PREFIX_PATH @@ -155,15 +117,6 @@ list(APPEND CMAKE_PREFIX_PATH "${ROCRAND_PATH}/lib/cmake" ) -# NOTE: without this, HIP jacobi build takes a *very* long time. The reason for -# that is that these variables are seemingly empty by default, thus there is no -# proper optimization applied to the HIP builds otherwise. -set(HIP_HIPCC_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the HIPCC compiler during DEBUG builds") -set(HIP_HIPCC_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL}" CACHE STRING "Flags used by the HIPCC compiler during MINSIZEREL builds") -set(HIP_HIPCC_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the HIPCC compiler during RELEASE builds") -set(HIP_HIPCC_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Flags used by the HIPCC compiler during RELWITHDEBINFO builds") - -find_package(HIP REQUIRED) find_package(hipblas REQUIRED) find_package(hipfft) # optional dependency find_package(hiprand REQUIRED) @@ -171,47 +124,8 @@ find_package(hipsparse REQUIRED) # At the moment, for hiprand to work also rocrand is required. find_package(rocrand REQUIRED) find_package(ROCTX) -find_path(GINKGO_HIP_THRUST_PATH "thrust/complex.h" - PATHS "${HIP_PATH}/../include" - ENV HIP_THRUST_PATH) -if (NOT GINKGO_HIP_THRUST_PATH) - message(FATAL_ERROR "Could not find the ROCm header thrust/complex.h which is required by Ginkgo HIP.") -endif() - -set(GINKGO_HIP_NVCC_ARCH "") -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - if (CMAKE_CUDA_HOST_COMPILER) - list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}") - endif() - - # Remove false positive CUDA warnings when calling one() and zero() - list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS --expt-relaxed-constexpr --expt-extended-lambda) - - # select GPU architecture - include(cmake/Modules/CudaArchitectureSelector.cmake) - cas_variable_cuda_architectures(GINKGO_HIP_NVCC_ARCH - ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES} - UNSUPPORTED "20" "21") -endif() - -# `target_compile_options` do not work with hip_add_library -# Thus, we need to pass the flags to `hip_add_library` itself -if(GINKGO_HIP_AMDGPU) - foreach(target ${GINKGO_HIP_AMDGPU}) - list(APPEND GINKGO_AMD_ARCH_FLAGS --amdgpu-target=${target}) - endforeach() -endif() - -set(GINKGO_HIPCC_OPTIONS ${GINKGO_HIP_COMPILER_FLAGS} "-std=c++14 -DGKO_COMPILING_HIP") -set(GINKGO_HIP_NVCC_OPTIONS ${GINKGO_HIP_NVCC_COMPILER_FLAGS} ${GINKGO_HIP_NVCC_ARCH} ${GINKGO_HIP_NVCC_ADDITIONAL_FLAGS}) -set(GINKGO_HIP_CLANG_OPTIONS ${GINKGO_HIP_CLANG_COMPILER_FLAGS} ${GINKGO_AMD_ARCH_FLAGS}) -if(GINKGO_HIP_AMD_UNSAFE_ATOMIC AND HIP_VERSION VERSION_GREATER_EQUAL 5) - list(APPEND GINKGO_HIP_CLANG_OPTIONS "-munsafe-fp-atomics -Wno-unused-command-line-argument") +if(GINKGO_HIP_AMD_UNSAFE_ATOMIC AND GINKGO_HIP_VERSION VERSION_GREATER_EQUAL 5) + set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -munsafe-fp-atomics -Wno-unused-command-line-argument") endif() -# HIP's cmake support secretly carries around global state to remember -# whether we created any shared libraries, and sets PIC flags accordingly. -# CMake's scoping rules means that this makes the hip_add_* calls order- and -# scope-dependent, which is not good. Let's set the flags ourselves instead. -list(APPEND GINKGO_HIP_CLANG_OPTIONS "-fPIC") -list(APPEND GINKGO_HIP_NVCC_OPTIONS "--shared -Xcompiler '-fPIC'") +set(CMAKE_HIP_STANDARD 14) diff --git a/cmake/hip_path.cmake b/cmake/hip_path.cmake index 58fcd3db447..a9f418cb3bd 100644 --- a/cmake/hip_path.cmake +++ b/cmake/hip_path.cmake @@ -7,11 +7,6 @@ if(NOT DEFINED HIP_PATH) endif() endif() -find_program(GINKGO_HIPCONFIG_PATH hipconfig HINTS "${HIP_PATH}/bin") -if(GINKGO_HIPCONFIG_PATH) - message(STATUS "Found hipconfig: ${GINKGO_HIPCONFIG_PATH}") -endif() - # We keep using NVCC/HCC for consistency with previous releases even if AMD # updated everything to use NVIDIA/AMD in ROCM 4.1 set(GINKGO_HIP_PLATFORM_NVCC 0) diff --git a/cmake/information_helpers.cmake b/cmake/information_helpers.cmake index 04687dfae5b..0e667a90f02 100644 --- a/cmake/information_helpers.cmake +++ b/cmake/information_helpers.cmake @@ -76,7 +76,9 @@ macro(ginkgo_interface_libraries_recursively INTERFACE_LIBS) # Populate the compiler options and definitions if needed get_target_property(GINKGO_LIBS_INTERFACE_DEFS "${_lib}" INTERFACE_COMPILE_DEFINITIONS) if (GINKGO_LIBS_INTERFACE_DEFS) - list(APPEND GINKGO_INTERFACE_CFLAGS_FOUND "${GINKGO_LIBS_INTERFACE_DEFS}") + foreach(def IN LISTS GINKGO_LIBS_INTERFACE_DEFS) + list(APPEND GINKGO_INTERFACE_CFLAGS_FOUND "-D${def}") + endforeach() endif() unset(GINKGO_LIBS_INTERFACE_DEFS) diff --git a/cmake/install_helpers.cmake b/cmake/install_helpers.cmake index 1e48c991e8a..898988142c9 100644 --- a/cmake/install_helpers.cmake +++ b/cmake/install_helpers.cmake @@ -59,19 +59,23 @@ function(ginkgo_install_library name) endfunction() function(ginkgo_install) - # generate pkg-config file, a three-step process is necessary to include the correct install prefix - # Step 1: substitute project variables in the generation script - configure_file("${Ginkgo_SOURCE_DIR}/cmake/generate_pkg.cmake.in" - "${Ginkgo_BINARY_DIR}/cmake/generate_pkg.cmake" - @ONLY) - # Step 2: substitute generator expressions - file(GENERATE OUTPUT ${Ginkgo_BINARY_DIR}/cmake/generate_pkg_$.cmake - INPUT ${Ginkgo_BINARY_DIR}/cmake/generate_pkg.cmake) - # Step 3: at install time, call the generation script which has all variables - # except the install prefix already replaced. Use the install prefix - # that is specified at install time - install(SCRIPT "${Ginkgo_BINARY_DIR}/cmake/generate_pkg_$.cmake" - COMPONENT Ginkgo_Development) + # static linking with pkg-config is not possible with HIP, since + # some linker information cannot be expressed in pkg-config files + if (BUILD_SHARED_LIBS OR NOT GINKGO_BUILD_HIP) + # generate pkg-config file, a three-step process is necessary to include the correct install prefix + # Step 1: substitute project variables in the generation script + configure_file("${Ginkgo_SOURCE_DIR}/cmake/generate_pkg.cmake.in" + "${Ginkgo_BINARY_DIR}/cmake/generate_pkg.cmake" + @ONLY) + # Step 2: substitute generator expressions + file(GENERATE OUTPUT ${Ginkgo_BINARY_DIR}/cmake/generate_pkg_$.cmake + INPUT ${Ginkgo_BINARY_DIR}/cmake/generate_pkg.cmake) + # Step 3: at install time, call the generation script which has all variables + # except the install prefix already replaced. Use the install prefix + # that is specified at install time + install(SCRIPT "${Ginkgo_BINARY_DIR}/cmake/generate_pkg_$.cmake" + COMPONENT Ginkgo_Development) + endif() # install the public header files install(DIRECTORY "${Ginkgo_SOURCE_DIR}/include/" diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 449b8da4584..1b5f9237612 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -116,10 +116,6 @@ ginkgo_compile_features(ginkgo) add_library(Ginkgo::ginkgo ALIAS ginkgo) target_link_libraries(ginkgo PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) - -# The PAPI dependency needs to be exposed to the user. -set(GKO_RPATH_ADDITIONS "") - if(GINKGO_HAVE_PAPI_SDE) target_link_libraries(ginkgo PUBLIC PAPI::PAPI_SDE) endif() @@ -140,14 +136,8 @@ if(GINKGO_BUILD_MPI) target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX) endif() -# Since we have a public dependency on HIP, this dependency appears -# here as well -if(GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") - list(APPEND GKO_RPATH_ADDITIONS "${HIP_PATH}/lib") -endif() - ginkgo_default_includes(ginkgo) -ginkgo_install_library(ginkgo "${GKO_RPATH_ADDITIONS}") +ginkgo_install_library(ginkgo) if(GINKGO_CHECK_CIRCULAR_DEPS) ginkgo_check_headers(ginkgo "") diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index ad106e123bc..f82df149b0e 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,3 +1,4 @@ +cmake_minimum_required(VERSION 3.21) include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake) add_instantiation_files(. matrix/csr_kernels.instantiate.hip.cpp CSR_INSTANTIATE) add_instantiation_files(. matrix/fbcsr_kernels.instantiate.hip.cpp FBCSR_INSTANTIATE) @@ -70,11 +71,7 @@ else() list(APPEND GINKGO_HIP_SOURCES matrix/fft_kernels_stub.hip.cpp) endif() -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - set(GKO_HIP_JACOBI_MAX_BLOCK_SIZE 32) -else() - set(GKO_HIP_JACOBI_MAX_BLOCK_SIZE 64) -endif() +set(GKO_HIP_JACOBI_MAX_BLOCK_SIZE 64) if(GINKGO_JACOBI_FULL_OPTIMIZATIONS) set(GKO_HIP_JACOBI_BLOCK_SIZES) foreach(blocksize RANGE 1 ${GKO_HIP_JACOBI_MAX_BLOCK_SIZE}) @@ -94,6 +91,16 @@ foreach(GKO_JACOBI_BLOCK_SIZE IN LISTS GKO_HIP_JACOBI_BLOCK_SIZES) configure_file( preconditioner/jacobi_advanced_apply_instantiate.inc.hip.cpp preconditioner/jacobi_advanced_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.hip.cpp) + # The 3D indexing used in Jacobi kernel triggers an instruction selection bug in Debug builds + # Probably the same as https://github.com/llvm/llvm-project/issues/67574 + # Fixed in ROCm 6.0 https://github.com/ROCm/llvm-project/commit/cd7f574a1fd1d3f3e8b9c1cae61fa8133a51de5f + # and in LLVM trunk https://github.com/llvm/llvm-project/commit/cc3d2533cc2e4ea06981b86ede5087fbf801e789 + set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_generate_instantiate.${GKO_JACOBI_BLOCK_SIZE}.hip.cpp + ${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_simple_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.hip.cpp + ${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_advanced_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.hip.cpp + PROPERTIES + COMPILE_OPTIONS $<$:-O2>) list(APPEND GINKGO_HIP_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_generate_instantiate.${GKO_JACOBI_BLOCK_SIZE}.hip.cpp ${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_simple_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.hip.cpp @@ -102,27 +109,17 @@ endforeach() string(REPLACE ";" "," GKO_HIP_JACOBI_BLOCK_SIZES_CODE "${GKO_HIP_JACOBI_BLOCK_SIZES}") configure_file(preconditioner/jacobi_common.hip.hpp.in preconditioner/jacobi_common.hip.hpp) -set_source_files_properties(${GINKGO_HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) -hip_add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES} - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} - CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} - ${GINKGO_STATIC_OR_SHARED}) +set_source_files_properties(${GINKGO_HIP_SOURCES} PROPERTIES LANGUAGE HIP) +add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES}) target_include_directories(ginkgo_hip - PUBLIC - ${HIP_INCLUDE_DIRS} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/.. # for generated headers like jacobi_common.hip.hpp - ${GINKGO_HIP_THRUST_PATH} - ${HIPBLAS_INCLUDE_DIRS} - ${HIPFFT_INCLUDE_DIRS} - ${hiprand_INCLUDE_DIRS} - ${HIPSPARSE_INCLUDE_DIRS} - $) + ) +target_compile_definitions(ginkgo_hip PRIVATE GKO_COMPILING_HIP) target_link_libraries(ginkgo_hip PUBLIC ginkgo_device) -target_link_libraries(ginkgo_hip PRIVATE roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) +target_link_libraries(ginkgo_hip PRIVATE hip::host roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) if (hipfft_FOUND) target_link_libraries(ginkgo_hip PRIVATE hip::hipfft) endif() @@ -130,40 +127,12 @@ if (GINKGO_HAVE_ROCTX) target_link_libraries(ginkgo_hip PRIVATE roc::roctx) endif() -if(GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") - find_package(hip REQUIRED) - # To save a bit of pain, we directly link against the `library` instead of - # linking against the target. - if (CMAKE_BUILD_TYPE) - # Check if our configuration is available first - string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPER_BUILD_TYPE) - get_target_property(HIP_LIBAMDHIP64_LIBRARIES hip::amdhip64 IMPORTED_LOCATION_${UPPER_BUILD_TYPE}) - endif() - if (NOT HIP_LIBAMDHIP64_LIBRARIES) - # Fall back to anything - get_target_property(HIP_LIBAMDHIP64_LIBRARIES hip::amdhip64 LOCATION) - endif() - target_link_libraries(ginkgo_hip PUBLIC ${HIP_LIBAMDHIP64_LIBRARIES}) -elseif(GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - find_package(CUDA 10.1 REQUIRED) - target_link_libraries(ginkgo_hip PUBLIC ${CUDA_LIBRARIES}) -endif() +target_compile_options(ginkgo_hip PRIVATE $<$:${GINKGO_COMPILER_FLAGS}>) -# Try to find everything in /opt/rocm/lib first. -set(GKO_HIP_RPATH "${ROCM_PATH}/lib" ) -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") - list(GET CUDA_LIBRARIES 0 CUDA_FIRST_LIB) - get_filename_component(GKO_CUDA_LIBDIR "${CUDA_FIRST_LIB}" DIRECTORY) - list(APPEND GKO_HIP_RPATH "${GKO_CUDA_LIBDIR}") -else() - list(APPEND GKO_HIP_RPATH "${HIP_PATH}/lib") -endif() -list(APPEND GKO_HIP_RPATH "${HIPBLAS_PATH}/lib" "${HIPRAND_PATH}/lib" - "${HIPSPARSE_PATH}/lib" "${ROCRAND_PATH}/lib") ginkgo_compile_features(ginkgo_hip) ginkgo_default_includes(ginkgo_hip) -ginkgo_install_library(ginkgo_hip "${GKO_HIP_RPATH}") +ginkgo_install_library(ginkgo_hip) if (GINKGO_CHECK_CIRCULAR_DEPS) ginkgo_check_headers(ginkgo_hip GKO_COMPILING_HIP) diff --git a/hip/get_info.cmake b/hip/get_info.cmake index 1610ac0eee4..14a770234fa 100644 --- a/hip/get_info.cmake +++ b/hip/get_info.cmake @@ -1,15 +1,6 @@ ginkgo_print_module_header(${detailed_log} "HIP") -ginkgo_print_foreach_variable(${detailed_log} - "GINKGO_HIPCONFIG_PATH;GINKGO_HIP_AMDGPU" - "GINKGO_HIP_CLANG_COMPILER_FLAGS;GINKGO_HIP_NVCC_COMPILER_FLAGS" - "GINKGO_HIP_THRUST_PATH;GINKGO_AMD_ARCH_FLAGS") ginkgo_print_module_footer(${detailed_log} "HIP variables:") -ginkgo_print_foreach_variable(${detailed_log} - "HIP_VERSION;HIP_COMPILER;HIP_PATH;ROCM_PATH" - "HIP_PLATFORM;HIP_ROOT_DIR;HIP_RUNTIME;HIPBLAS_PATH;HIPSPARSE_PATH" - "HIPRAND_PATH;ROCRAND_PATH;HIP_CLANG_INCLUDE_PATH;HIP_CLANG_PATH" - "HIP_HIPCC_EXECUTABLE;HIP_HIPCONFIG_EXECUTABLE;HIP_HOST_COMPILATION_CPP") -ginkgo_print_flags(${detailed_log} "HIP_HIPCC_FLAGS") -ginkgo_print_flags(${detailed_log} "HIP_NVCC_FLAGS") -ginkgo_print_flags(${detailed_log} "HIP_CLANG_FLAGS") +ginkgo_print_flags(${detailed_log} "CMAKE_HIP_FLAGS") +ginkgo_print_flags(${detailed_log} "CMAKE_HIP_COMPILER") +ginkgo_print_variable(${detailed_log} "CMAKE_HIP_COMPILER_VERSION") ginkgo_print_module_footer(${detailed_log} "") diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index 11b0e209aeb..ae29eb782f1 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -9,14 +9,12 @@ if(GINKGO_HAVE_HWLOC) endif() endif() ginkgo_create_hip_test(kernel_launch) -# correct flags for kernel_launch.hpp are set in GINKGO_HIPCC_OPTIONS ginkgo_create_test(lin_op RESOURCE_TYPE hipgpu) ginkgo_create_hip_test(math) ginkgo_create_test(memory RESOURCE_TYPE hipgpu) -# Only hcc needs the libraries. nvcc only requires the headers. -if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") - ginkgo_create_hip_test(exception_helpers ADDITIONAL_LIBRARIES roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) -else() - ginkgo_create_hip_test(exception_helpers) +set(additional_libs) +if(hipfft_FOUND) + set(additional_libs hip::hipfft) endif() +ginkgo_create_hip_test(exception_helpers ADDITIONAL_LIBRARIES roc::hipblas roc::hipsparse hip::hiprand roc::rocrand ${additional_libs}) ginkgo_create_hip_test(scoped_device_id) diff --git a/test/matrix/fft_kernels.cpp b/test/matrix/fft_kernels.cpp index d5ba741a064..ed186b1df60 100644 --- a/test/matrix/fft_kernels.cpp +++ b/test/matrix/fft_kernels.cpp @@ -149,6 +149,9 @@ TYPED_TEST(Fft, ApplyStrided1DInverseIsEqualToReference) TYPED_TEST(Fft, Apply2DIsEqualToReference) { +#if defined(GKO_COMPILING_HIP) && GINKGO_HIP_PLATFORM_HCC + GTEST_SKIP() << "rocFFT 5.1 has a bug related to 2D FFT"; +#endif using T = typename TestFixture::value_type; this->fft2->apply(this->data, this->out); @@ -160,6 +163,9 @@ TYPED_TEST(Fft, Apply2DIsEqualToReference) TYPED_TEST(Fft, ApplyStrided2DIsEqualToReference) { +#if defined(GKO_COMPILING_HIP) && GINKGO_HIP_PLATFORM_HCC + GTEST_SKIP() << "rocFFT 5.1 has a bug related to 2D FFT"; +#endif using T = typename TestFixture::value_type; this->fft2->apply(this->data_strided, this->out_strided); @@ -171,6 +177,9 @@ TYPED_TEST(Fft, ApplyStrided2DIsEqualToReference) TYPED_TEST(Fft, Apply2DInverseIsEqualToReference) { +#if defined(GKO_COMPILING_HIP) && GINKGO_HIP_PLATFORM_HCC + GTEST_SKIP() << "rocFFT 5.1 has a bug related to 2D FFT"; +#endif using T = typename TestFixture::value_type; this->ifft2->apply(this->data, this->out); @@ -182,6 +191,9 @@ TYPED_TEST(Fft, Apply2DInverseIsEqualToReference) TYPED_TEST(Fft, ApplyStrided2DInverseIsEqualToReference) { +#if defined(GKO_COMPILING_HIP) && GINKGO_HIP_PLATFORM_HCC + GTEST_SKIP() << "rocFFT 5.1 has a bug related to 2D FFT"; +#endif using T = typename TestFixture::value_type; this->ifft2->apply(this->data_strided, this->out_strided); diff --git a/test/test_install/CMakeLists.txt b/test/test_install/CMakeLists.txt index 513af67e923..ee19b8d030e 100644 --- a/test/test_install/CMakeLists.txt +++ b/test/test_install/CMakeLists.txt @@ -38,39 +38,22 @@ if(GINKGO_BUILD_CUDA) enable_language(CUDA) configure_file(test_install.cpp test_install.cu COPYONLY) add_executable(test_install_cuda ${CMAKE_CURRENT_BINARY_DIR}/test_install.cu) + set_target_properties(test_install_cuda PROPERTIES CUDA_STANDARD 14) target_compile_definitions(test_install_cuda PRIVATE HAS_CUDA=1) target_compile_definitions(test_install_cuda PRIVATE HAS_REFERENCE=${HAS_REFERENCE}) target_link_libraries(test_install_cuda PRIVATE Ginkgo::ginkgo) endif() if(GINKGO_BUILD_HIP) - find_package(HIP REQUIRED) - - set_source_files_properties(test_install.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) - # The library was compiled with `-fPIC` when in shared library mode - if (GINKGO_BUILD_SHARED_LIBS) - set (GINKGO_PIC_OPTION "-fPIC") - set (GINKGO_CUDA_PIC_OPTION "-Xcompiler '-fPIC'") - else() - set (GINKGO_PIC_OPTION "$<$:-fPIC>") - endif() - if (CMAKE_CUDA_HOST_COMPILER) - set(TESTINSTALL_CUDA_HOST_COMPILER "-ccbin=${CMAKE_CUDA_HOST_COMPILER}") - endif() - hip_add_executable(test_install_hip test_install.cpp - HIPCC_OPTIONS "-std=c++14" - CLANG_OPTIONS "${GINKGO_PIC_OPTION}" - NVCC_OPTIONS "${GINKGO_CUDA_PIC_OPTION}" "${TESTINSTALL_CUDA_HOST_COMPILER}") + enable_language(HIP) + configure_file(test_install.cpp test_install.hip.cpp COPYONLY) + set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/test_install.hip.cpp PROPERTIES LANGUAGE HIP) + add_executable(test_install_hip ${CMAKE_CURRENT_BINARY_DIR}/test_install.hip.cpp) + set_target_properties(test_install_hip PROPERTIES HIP_STANDARD 14) target_link_libraries(test_install_hip PRIVATE Ginkgo::ginkgo) target_compile_definitions(test_install_hip PRIVATE HAS_HIP=1) target_compile_definitions(test_install_hip PRIVATE HAS_REFERENCE=${HAS_REFERENCE}) - - # If we always link with CXX there is no RPATH issue - set_target_properties(test_install_hip PROPERTIES LINKER_LANGUAGE CXX) - - # Instead of using CXX, it's possible to instead use - # target_link_libraries(test_install_hip PRIVATE ${GINKGO_INSTALL_RPATH_FOR_HIP}) endif() # Installation step diff --git a/third_party/identify_stream_usage/CMakeLists.txt b/third_party/identify_stream_usage/CMakeLists.txt index c20fe43278b..cb316205953 100644 --- a/third_party/identify_stream_usage/CMakeLists.txt +++ b/third_party/identify_stream_usage/CMakeLists.txt @@ -23,26 +23,25 @@ if(GINKGO_BUILD_CUDA) ENVIRONMENT LD_PRELOAD=$) endif() -if(GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") +if(GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM_AMD) find_package(hip REQUIRED) set_source_files_properties(identify_stream_usage.hip.cpp test_default_stream_identification.hip.cpp - PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) - hip_add_library(identify_stream_usage_hip - identify_stream_usage.hip.cpp - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} - CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} - SHARED) - target_link_libraries(identify_stream_usage_hip PUBLIC hip::amdhip64 ${CMAKE_DL_LIBS}) + PROPERTIES LANGUAGE HIP) + add_library(identify_stream_usage_hip SHARED identify_stream_usage.hip.cpp) + target_link_libraries(identify_stream_usage_hip ${CMAKE_DL_LIBS}) + set_target_properties( + identify_stream_usage_hip + PROPERTIES # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ) - hip_add_executable(test_stream_identification_hip - test_default_stream_identification.hip.cpp - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} - CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS}) + add_executable(test_stream_identification_hip + test_default_stream_identification.hip.cpp) add_test(NAME default_stream_identification_hip COMMAND test_stream_identification_hip) set_tests_properties(default_stream_identification_hip PROPERTIES ENVIRONMENT LD_PRELOAD=$) -endif() \ No newline at end of file +endif()