From 4dcb2946de4d2936349acddeeee2102e06d579fa Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Thu, 2 Mar 2023 18:43:05 -0500 Subject: [PATCH] Use KOKKOS_ARCH_NVIDIA_GPU macro in SYCL, OpenACC, and OpenMPTarget backends where appropriate --- core/src/OpenACC/Kokkos_OpenACC_Traits.hpp | 3 +-- core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp | 7 ++----- core/src/SYCL/Kokkos_SYCL.cpp | 5 +---- core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp | 10 ++-------- 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp b/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp index c8a6dfec6f..97d34d19a3 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp @@ -22,8 +22,7 @@ namespace Kokkos::Experimental::Impl { struct OpenACC_Traits { -#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_AMPERE) || defined(KOKKOS_ARCH_HOPPER) +#if defined(KOKKOS_ARCH_NVIDIA_GPU) static constexpr acc_device_t dev_type = acc_device_nvidia; static constexpr bool may_fallback_to_host = false; #else diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp index 564f299ab5..abe1dad73d 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -93,13 +93,10 @@ void OpenMPTargetInternal::impl_initialize() { // FIXME_OPENMPTARGET: Only fix the number of teams for NVIDIA architectures // from Pascal and upwards. -#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ - defined(KOKKOS_ARCH_HOPPER) -#if defined(KOKKOS_COMPILER_CLANG) && (KOKKOS_COMPILER_CLANG >= 1300) +#if defined(KOKKOS_ARCH_NVIDIA_GPU) && defined(KOKKOS_COMPILER_CLANG) && \ + (KOKKOS_COMPILER_CLANG >= 1300) omp_set_num_teams(512); #endif -#endif } int OpenMPTargetInternal::impl_is_initialized() { return m_is_initialized ? 1 : 0; diff --git a/core/src/SYCL/Kokkos_SYCL.cpp b/core/src/SYCL/Kokkos_SYCL.cpp index c665631dd6..72facc856b 100644 --- a/core/src/SYCL/Kokkos_SYCL.cpp +++ b/core/src/SYCL/Kokkos_SYCL.cpp @@ -128,10 +128,7 @@ void SYCL::impl_initialize(InitializationSettings const& settings) { // If the device id is not specified and there are no GPUs, sidestep Kokkos // device selection and use whatever is available (if no GPU architecture is // specified). -#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_ARCH_KEPLER) && \ - !defined(KOKKOS_ARCH_MAXWELL) && !defined(KOKKOS_ARCH_PASCAL) && \ - !defined(KOKKOS_ARCH_VOLTA) && !defined(KOKKOS_ARCH_TURING75) && \ - !defined(KOKKOS_ARCH_AMPERE) && !defined(KOKKOS_ARCH_HOPPER) +#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_ARCH_NVIDIA_GPU) if (!settings.has_device_id() && gpu_devices.empty()) { Impl::SYCLInternal::singleton().initialize(sycl::device()); Impl::SYCLInternal::m_syclDev = 0; diff --git a/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp b/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp index 59e9a7d515..be9a384c78 100644 --- a/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp +++ b/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp @@ -304,10 +304,7 @@ class TeamPolicyInternal return std::min({ int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize), // FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs. -#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \ - defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ - defined(KOKKOS_ARCH_HOPPER) +#if defined(KOKKOS_ARCH_NVIDIA_GPU) 256, #endif max_threads_for_memory @@ -337,10 +334,7 @@ class TeamPolicyInternal return std::min({ int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize), // FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs. -#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \ - defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ - defined(KOKKOS_ARCH_HOPPER) +#if defined(KOKKOS_ARCH_NVIDIA_GPU) 256, #endif max_threads_for_memory