From 92970cd43547eca63886fff36f7a525e61d2fae3 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 9 Jun 2021 19:40:57 +0200 Subject: [PATCH 01/11] add the issue test for cuda/hip reset collision --- .gitlab-ci.yml | 6 +-- .gitlab/image.yml | 3 +- cuda/test/base/CMakeLists.txt | 1 + cuda/test/base/cuda_executor_reset.cu | 65 ++++++++++++++++++++++++ hip/test/base/CMakeLists.txt | 1 + hip/test/base/hip_executor_reset.hip.cpp | 65 ++++++++++++++++++++++++ 6 files changed, 136 insertions(+), 5 deletions(-) create mode 100644 cuda/test/base/cuda_executor_reset.cu create mode 100644 hip/test/base/hip_executor_reset.hip.cpp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8bfdcf7e8e8..2cfb0faf44f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -315,9 +315,9 @@ build/cuda91/clang/all/release/shared: # cuda 9.2 and friends build/cuda92/gcc/all/release/shared: - <<: *default_build + <<: *default_build_with_test extends: - - .full_test_condition + - .quick_test_condition - .use_gko-cuda92-gnu7-llvm50-intel2017 variables: <<: *default_variables @@ -325,7 +325,7 @@ build/cuda92/gcc/all/release/shared: BUILD_CUDA: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Release" - CUDA_ARCH: 35 + CUDA_ARCH: 61 # cuda 10.0 and friends # Make sure that our jobs run when using self-installed diff --git a/.gitlab/image.yml b/.gitlab/image.yml index 5ea3889d8df..a2afe0fcd53 100644 --- a/.gitlab/image.yml +++ b/.gitlab/image.yml @@ -30,8 +30,7 @@ image: ginkgohub/cuda:92-gnu7-llvm50-intel2017 tags: - private_ci - - controller - - cpu + - nvidia-gpu .use_gko-cuda100-gnu7-llvm60-intel2018: image: ginkgohub/cuda:100-gnu7-llvm60-intel2018 diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index cedc2d8d328..bc9124a30bf 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) +ginkgo_create_cuda_test(cuda_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_cuda_test(cuda_executor_topology NUMA::NUMA) diff --git a/cuda/test/base/cuda_executor_reset.cu b/cuda/test/base/cuda_executor_reset.cu new file mode 100644 index 00000000000..b63318b9afc --- /dev/null +++ b/cuda/test/base/cuda_executor_reset.cu @@ -0,0 +1,65 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +namespace { + + +#define GTEST_ASSERT_NO_EXIT(statement) \ + ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") + +TEST(DeviceReset, HipCuda) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto hip = gko::HipExecutor::create(0, ref, true); + auto cuda = gko::CudaExecutor::create(0, ref, true); + }); +} + + +TEST(DeviceReset, CudaHip) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto cuda = gko::CudaExecutor::create(0, ref, true); + auto hip = gko::HipExecutor::create(0, ref, true); + }); +} + + +} // namespace diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index c2f694fdf8a..14640d2fb2d 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_hip_test(hip_executor) +ginkgo_create_hip_test(hip_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_hip_test(hip_executor_topology NUMA::NUMA) diff --git a/hip/test/base/hip_executor_reset.hip.cpp b/hip/test/base/hip_executor_reset.hip.cpp new file mode 100644 index 00000000000..b63318b9afc --- /dev/null +++ b/hip/test/base/hip_executor_reset.hip.cpp @@ -0,0 +1,65 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +namespace { + + +#define GTEST_ASSERT_NO_EXIT(statement) \ + ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") + +TEST(DeviceReset, HipCuda) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto hip = gko::HipExecutor::create(0, ref, true); + auto cuda = gko::CudaExecutor::create(0, ref, true); + }); +} + + +TEST(DeviceReset, CudaHip) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto cuda = gko::CudaExecutor::create(0, ref, true); + auto hip = gko::HipExecutor::create(0, ref, true); + }); +} + + +} // namespace From be4d94181887a5f3f09e362b754e06c075c0808d Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 9 Jun 2021 21:07:38 +0200 Subject: [PATCH 02/11] add device --- core/device_hooks/CMakeLists.txt | 5 ++ cuda/CMakeLists.txt | 4 +- cuda/base/executor.cpp | 3 + devices/CMakeLists.txt | 1 + devices/cuda/executor.cpp | 6 -- devices/device.cpp | 54 ++++++++++++++++++ devices/hip/executor.cpp | 6 -- dpcpp/CMakeLists.txt | 2 +- hip/CMakeLists.txt | 2 +- hip/base/executor.hip.cpp | 3 + include/ginkgo/core/base/device.hpp | 82 +++++++++++++++++++++++++++ include/ginkgo/core/base/executor.hpp | 45 +++++++++------ include/ginkgo/ginkgo.hpp | 1 + omp/CMakeLists.txt | 2 +- reference/CMakeLists.txt | 2 +- 15 files changed, 182 insertions(+), 36 deletions(-) create mode 100644 devices/device.cpp create mode 100644 include/ginkgo/core/base/device.hpp diff --git a/core/device_hooks/CMakeLists.txt b/core/device_hooks/CMakeLists.txt index fcb370a81a0..a3d00014025 100644 --- a/core/device_hooks/CMakeLists.txt +++ b/core/device_hooks/CMakeLists.txt @@ -1,6 +1,7 @@ if(NOT GINKGO_BUILD_CUDA) add_library(ginkgo_cuda $ + $ cuda_hooks.cpp) target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_cuda) @@ -11,6 +12,7 @@ endif() if (NOT GINKGO_BUILD_DPCPP) add_library(ginkgo_dpcpp $ + $ dpcpp_hooks.cpp) target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_dpcpp) @@ -21,6 +23,7 @@ endif() if(NOT GINKGO_BUILD_HIP) add_library(ginkgo_hip $ + $ hip_hooks.cpp) target_link_libraries(ginkgo_hip PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_hip) @@ -31,6 +34,7 @@ endif() if (NOT GINKGO_BUILD_OMP) add_library(ginkgo_omp $ + $ omp_hooks.cpp) ginkgo_compile_features(ginkgo_omp) target_link_libraries(ginkgo_omp PRIVATE ginkgo_cuda) @@ -44,6 +48,7 @@ endif() if (NOT GINKGO_BUILD_REFERENCE) add_library(ginkgo_reference $ + $ reference_hooks.cpp) target_link_libraries(ginkgo_reference PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_reference) diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 6f89ca45f79..0a45231aae1 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -64,8 +64,8 @@ find_library(CUSPARSE cusparse find_library(CURAND curand HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) -add_library(ginkgo_cuda $ "") -set(GKO_CUDA_COMMON_SOURCES +add_library(ginkgo_cuda $ $ "") +set(GKO_CUDA_COMMON_SOURCES ../common/components/precision_conversion.cpp ../common/matrix/dense_kernels.cpp ../common/solver/bicg_kernels.cpp diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 0c7bbef8f10..38bffb3aa10 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -64,6 +65,8 @@ std::shared_ptr CudaExecutor::create( alloc_mode), [device_id](CudaExecutor *exec) { auto device_reset = exec->get_device_reset(); + std::lock_guard guard( + device_type::mutex[device_id]); delete exec; if (!CudaExecutor::get_num_execs(device_id) && device_reset) { cuda::device_guard g(device_id); diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index 0c0e80febaf..85632345dc2 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -17,6 +17,7 @@ function(ginkgo_add_library name) endfunction() ginkgo_add_library(ginkgo_device machine_topology.cpp) +ginkgo_add_object_library(ginkgo_device_mutex device.cpp) ginkgo_install_library(ginkgo_device) add_subdirectory(cuda) diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp index c1efe6e7d40..abdc93a4a08 100644 --- a/devices/cuda/executor.cpp +++ b/devices/cuda/executor.cpp @@ -64,10 +64,4 @@ bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const } -unsigned CudaExecutor::num_execs[max_devices]; - - -std::mutex CudaExecutor::mutex[max_devices]; - - } // namespace gko diff --git a/devices/device.cpp b/devices/device.cpp new file mode 100644 index 00000000000..b178de0777f --- /dev/null +++ b/devices/device.cpp @@ -0,0 +1,54 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include + + +namespace gko { + + +std::recursive_mutex NvidiaDevice::mutex[max_devices]; + +int NvidiaDevice::num_execs[max_devices]; + + +std::recursive_mutex AmdDevice::mutex[max_devices]; + +int AmdDevice::num_execs[max_devices]; + + +} // namespace gko diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp index 31325fcd15f..0e755209f91 100644 --- a/devices/hip/executor.cpp +++ b/devices/hip/executor.cpp @@ -61,10 +61,4 @@ bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const } -int HipExecutor::num_execs[max_devices]; - - -std::mutex HipExecutor::mutex[max_devices]; - - } // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index e2d476164e8..0289fd889e1 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -9,7 +9,7 @@ set(GINKGO_DPCPP_VERSION ${GINKGO_DPCPP_VERSION} PARENT_SCOPE) find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}") set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE) -add_library(ginkgo_dpcpp $ "") +add_library(ginkgo_dpcpp $ $ "") target_sources(ginkgo_dpcpp PRIVATE base/version.dp.cpp diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 61870d03aa7..3d98ec4acd5 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -239,7 +239,7 @@ set(GINKGO_HIP_NVCC_OPTIONS ${GINKGO_HIP_NVCC_COMPILER_FLAGS} ${GINKGO_HIP_NVCC_ set(GINKGO_HIP_CLANG_OPTIONS ${GINKGO_HIP_CLANG_COMPILER_FLAGS} ${GINKGO_AMD_ARCH_FLAGS}) set_source_files_properties(${GINKGO_HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) -hip_add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES} +hip_add_library(ginkgo_hip $ $ ${GINKGO_HIP_SOURCES} HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 1e43fe96549..cdb22ce7ee2 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -63,6 +64,8 @@ std::shared_ptr HipExecutor::create( new HipExecutor(device_id, std::move(master), device_reset, alloc_mode), [device_id](HipExecutor *exec) { auto device_reset = exec->get_device_reset(); + std::lock_guard guard( + device_type::mutex[device_id]); delete exec; if (!HipExecutor::get_num_execs(device_id) && device_reset) { hip::device_guard g(device_id); diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp new file mode 100644 index 00000000000..f933d05e1be --- /dev/null +++ b/include/ginkgo/core/base/device.hpp @@ -0,0 +1,82 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_DEVICE_HPP_ +#define GKO_PUBLIC_CORE_BASE_DEVICE_HPP_ + + +#include +#include +#include +#include + + +#include + + +namespace gko { + + +class CudaExecutor; + +class HipExecutor; + + +class NvidiaDevice { + friend class CudaExecutor; +#if GINKGO_HIP_PLATFORM_NVCC + friend class HipExecutor; +#endif + +private: + static constexpr int max_devices = 64; + static std::recursive_mutex mutex[max_devices]; + static int num_execs[max_devices]; +}; + + +class AmdDevice { +// to avoid both GINKGO_HIP_PLATFORM_* zero +#if !GINKGO_HIP_PLATFORM_NVCC + friend class HipExecutor; +#endif + +private: + static constexpr int max_devices = 64; + static std::recursive_mutex mutex[max_devices]; + static int num_execs[max_devices]; +}; + + +} // namespace gko + +#endif // GKO_PUBLIC_CORE_BASE_DEVICE_HPP_ diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 0d96cb4cd66..3e5c47d5278 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -1408,6 +1409,8 @@ class CudaExecutor : public detail::ExecutorBase, int get_closest_numa() const { return this->get_exec_info().numa_node; } protected: + using device_type = NvidiaDevice; + void set_gpu_property(); void init_handles(); @@ -1450,20 +1453,23 @@ class CudaExecutor : public detail::ExecutorBase, static void increase_num_execs(unsigned device_id) { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]++; + std::lock_guard guard( + device_type::mutex[device_id]); + device_type::num_execs[device_id]++; } static void decrease_num_execs(unsigned device_id) { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]--; + std::lock_guard guard( + device_type::mutex[device_id]); + device_type::num_execs[device_id]--; } static unsigned get_num_execs(unsigned device_id) { - std::lock_guard guard(mutex[device_id]); - return num_execs[device_id]; + std::lock_guard guard( + device_type::mutex[device_id]); + return device_type::num_execs[device_id]; } void populate_exec_info(const MachineTopology *mach_topo) override; @@ -1476,9 +1482,6 @@ class CudaExecutor : public detail::ExecutorBase, handle_manager cublas_handle_; handle_manager cusparse_handle_; - static constexpr int max_devices = 64; - static unsigned num_execs[max_devices]; - static std::mutex mutex[max_devices]; allocation_mode alloc_mode_; }; @@ -1625,6 +1628,12 @@ class HipExecutor : public detail::ExecutorBase, } protected: +#if (GINKGO_HIP_PLATFORM_NVCC == 1) + using device_type = NvidiaDevice; +#else + using device_type = AmdDevice; +#endif + void set_gpu_property(); void init_handles(); @@ -1667,20 +1676,23 @@ class HipExecutor : public detail::ExecutorBase, static void increase_num_execs(int device_id) { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]++; + std::lock_guard guard( + device_type::mutex[device_id]); + device_type::num_execs[device_id]++; } static void decrease_num_execs(int device_id) { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]--; + std::lock_guard guard( + device_type::mutex[device_id]); + device_type::num_execs[device_id]--; } static int get_num_execs(int device_id) { - std::lock_guard guard(mutex[device_id]); - return num_execs[device_id]; + std::lock_guard guard( + device_type::mutex[device_id]); + return device_type::num_execs[device_id]; } void populate_exec_info(const MachineTopology *mach_topo) override; @@ -1693,9 +1705,6 @@ class HipExecutor : public detail::ExecutorBase, handle_manager hipblas_handle_; handle_manager hipsparse_handle_; - static constexpr int max_devices = 64; - static int num_execs[max_devices]; - static std::mutex mutex[max_devices]; allocation_mode alloc_mode_; }; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index a757a5a6072..a16163edc8a 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index c58709bc904..af680f7a39b 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -1,6 +1,6 @@ find_package(OpenMP 3.0 REQUIRED) -add_library(ginkgo_omp $ "") +add_library(ginkgo_omp $ $ "") target_sources(ginkgo_omp PRIVATE base/version.cpp diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index c008ad1a87c..a66c0323d4f 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(ginkgo_reference $ "") +add_library(ginkgo_reference $ $ "") target_sources(ginkgo_reference PRIVATE base/version.cpp From 4f58ceff77eb192a48cfcae22f857bf67e51e58e Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 1 Jul 2021 14:44:05 +0200 Subject: [PATCH 03/11] put device in ginkgo_device not device_mutex --- core/device_hooks/CMakeLists.txt | 5 ----- cuda/CMakeLists.txt | 2 +- devices/CMakeLists.txt | 3 +-- dpcpp/CMakeLists.txt | 2 +- hip/CMakeLists.txt | 2 +- include/ginkgo/core/base/device.hpp | 10 ++++++++++ omp/CMakeLists.txt | 2 +- reference/CMakeLists.txt | 2 +- 8 files changed, 16 insertions(+), 12 deletions(-) diff --git a/core/device_hooks/CMakeLists.txt b/core/device_hooks/CMakeLists.txt index a3d00014025..fcb370a81a0 100644 --- a/core/device_hooks/CMakeLists.txt +++ b/core/device_hooks/CMakeLists.txt @@ -1,7 +1,6 @@ if(NOT GINKGO_BUILD_CUDA) add_library(ginkgo_cuda $ - $ cuda_hooks.cpp) target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_cuda) @@ -12,7 +11,6 @@ endif() if (NOT GINKGO_BUILD_DPCPP) add_library(ginkgo_dpcpp $ - $ dpcpp_hooks.cpp) target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_dpcpp) @@ -23,7 +21,6 @@ endif() if(NOT GINKGO_BUILD_HIP) add_library(ginkgo_hip $ - $ hip_hooks.cpp) target_link_libraries(ginkgo_hip PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_hip) @@ -34,7 +31,6 @@ endif() if (NOT GINKGO_BUILD_OMP) add_library(ginkgo_omp $ - $ omp_hooks.cpp) ginkgo_compile_features(ginkgo_omp) target_link_libraries(ginkgo_omp PRIVATE ginkgo_cuda) @@ -48,7 +44,6 @@ endif() if (NOT GINKGO_BUILD_REFERENCE) add_library(ginkgo_reference $ - $ reference_hooks.cpp) target_link_libraries(ginkgo_reference PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_reference) diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 0a45231aae1..2e25435e5c5 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -64,7 +64,7 @@ find_library(CUSPARSE cusparse find_library(CURAND curand HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) -add_library(ginkgo_cuda $ $ "") +add_library(ginkgo_cuda $ "") set(GKO_CUDA_COMMON_SOURCES ../common/components/precision_conversion.cpp ../common/matrix/dense_kernels.cpp diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index 85632345dc2..09797aafe49 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -16,8 +16,7 @@ function(ginkgo_add_library name) set_target_properties(${name} PROPERTIES POSITION_INDEPENDENT_CODE ON) endfunction() -ginkgo_add_library(ginkgo_device machine_topology.cpp) -ginkgo_add_object_library(ginkgo_device_mutex device.cpp) +ginkgo_add_library(ginkgo_device machine_topology.cpp device.cpp) ginkgo_install_library(ginkgo_device) add_subdirectory(cuda) diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 0289fd889e1..e2d476164e8 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -9,7 +9,7 @@ set(GINKGO_DPCPP_VERSION ${GINKGO_DPCPP_VERSION} PARENT_SCOPE) find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}") set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE) -add_library(ginkgo_dpcpp $ $ "") +add_library(ginkgo_dpcpp $ "") target_sources(ginkgo_dpcpp PRIVATE base/version.dp.cpp diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 3d98ec4acd5..61870d03aa7 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -239,7 +239,7 @@ set(GINKGO_HIP_NVCC_OPTIONS ${GINKGO_HIP_NVCC_COMPILER_FLAGS} ${GINKGO_HIP_NVCC_ set(GINKGO_HIP_CLANG_OPTIONS ${GINKGO_HIP_CLANG_COMPILER_FLAGS} ${GINKGO_AMD_ARCH_FLAGS}) set_source_files_properties(${GINKGO_HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) -hip_add_library(ginkgo_hip $ $ ${GINKGO_HIP_SOURCES} +hip_add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES} HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index f933d05e1be..01f0bd7107d 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -51,8 +51,13 @@ class CudaExecutor; class HipExecutor; +/** + * NvidiaDevice handles the number of executor on Nvidia devices and have the + * corresponding recursive_mutex. + */ class NvidiaDevice { friend class CudaExecutor; +// If Hip is compiled for NVCC, give NvidiaDevice's permission to HipExecutor #if GINKGO_HIP_PLATFORM_NVCC friend class HipExecutor; #endif @@ -64,8 +69,13 @@ class NvidiaDevice { }; +/** + * AmdDevice handles the number of executor on Amd devices and have the + * corresponding recursive_mutex. + */ class AmdDevice { // to avoid both GINKGO_HIP_PLATFORM_* zero +// If Hip isn't compiled for NVCC, give AmdDevice's permission to HipExecutor #if !GINKGO_HIP_PLATFORM_NVCC friend class HipExecutor; #endif diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index af680f7a39b..c58709bc904 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -1,6 +1,6 @@ find_package(OpenMP 3.0 REQUIRED) -add_library(ginkgo_omp $ $ "") +add_library(ginkgo_omp $ "") target_sources(ginkgo_omp PRIVATE base/version.cpp diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index a66c0323d4f..c008ad1a87c 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(ginkgo_reference $ $ "") +add_library(ginkgo_reference $ "") target_sources(ginkgo_reference PRIVATE base/version.cpp From 0596b92265c690246c407ae3d9869feb02d497aa Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 1 Jul 2021 18:06:11 +0200 Subject: [PATCH 04/11] use static getter to avoid the static issue in MSVC --- cuda/base/executor.cpp | 2 +- devices/CMakeLists.txt | 2 +- devices/device.cpp | 54 -------------------------- hip/base/executor.hip.cpp | 2 +- include/ginkgo/core/base/device.hpp | 56 +++++++++++++++++++++++++-- include/ginkgo/core/base/executor.hpp | 24 ++++++------ 6 files changed, 67 insertions(+), 73 deletions(-) delete mode 100644 devices/device.cpp diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 38bffb3aa10..47a712c3eba 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -66,7 +66,7 @@ std::shared_ptr CudaExecutor::create( [device_id](CudaExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - device_type::mutex[device_id]); + device_type::get_mutex(device_id)); delete exec; if (!CudaExecutor::get_num_execs(device_id) && device_reset) { cuda::device_guard g(device_id); diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index 09797aafe49..0c0e80febaf 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -16,7 +16,7 @@ function(ginkgo_add_library name) set_target_properties(${name} PROPERTIES POSITION_INDEPENDENT_CODE ON) endfunction() -ginkgo_add_library(ginkgo_device machine_topology.cpp device.cpp) +ginkgo_add_library(ginkgo_device machine_topology.cpp) ginkgo_install_library(ginkgo_device) add_subdirectory(cuda) diff --git a/devices/device.cpp b/devices/device.cpp deleted file mode 100644 index b178de0777f..00000000000 --- a/devices/device.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include -#include -#include - - -#include - - -namespace gko { - - -std::recursive_mutex NvidiaDevice::mutex[max_devices]; - -int NvidiaDevice::num_execs[max_devices]; - - -std::recursive_mutex AmdDevice::mutex[max_devices]; - -int AmdDevice::num_execs[max_devices]; - - -} // namespace gko diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index cdb22ce7ee2..36df8fa7152 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -65,7 +65,7 @@ std::shared_ptr HipExecutor::create( [device_id](HipExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - device_type::mutex[device_id]); + device_type::get_mutex(device_id)); delete exec; if (!HipExecutor::get_num_execs(device_id) && device_reset) { hip::device_guard g(device_id); diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index 01f0bd7107d..f60fecae736 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -63,9 +63,33 @@ class NvidiaDevice { #endif private: + /** + * get_mutex gets the static mutex reference at i. + * + * @param i index of mutex + * + * @return recursive_mutex reference + */ + static std::recursive_mutex &get_mutex(int i) + { + static std::recursive_mutex mutex[max_devices]; + return mutex[i]; + } + + /** + * get_num_execs gets the static num_execs reference at i. + * + * @param i index of num_execs + * + * @return int reference + */ + static int &get_num_execs(int i) + { + static int num_execs[max_devices]; + return num_execs[i]; + } + static constexpr int max_devices = 64; - static std::recursive_mutex mutex[max_devices]; - static int num_execs[max_devices]; }; @@ -81,9 +105,33 @@ class AmdDevice { #endif private: + /** + * get_mutex gets the static mutex reference at i. + * + * @param i index of mutex + * + * @return recursive_mutex reference + */ + static std::recursive_mutex &get_mutex(int i) + { + static std::recursive_mutex mutex[max_devices]; + return mutex[i]; + } + + /** + * get_num_execs gets the static num_execs reference at i. + * + * @param i index of num_execs + * + * @return int reference + */ + static int &get_num_execs(int i) + { + static int num_execs[max_devices]; + return num_execs[i]; + } + static constexpr int max_devices = 64; - static std::recursive_mutex mutex[max_devices]; - static int num_execs[max_devices]; }; diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 3e5c47d5278..9e7f12ab74f 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1454,22 +1454,22 @@ class CudaExecutor : public detail::ExecutorBase, static void increase_num_execs(unsigned device_id) { std::lock_guard guard( - device_type::mutex[device_id]); - device_type::num_execs[device_id]++; + device_type::get_mutex(device_id)); + device_type::get_num_execs(device_id)++; } static void decrease_num_execs(unsigned device_id) { std::lock_guard guard( - device_type::mutex[device_id]); - device_type::num_execs[device_id]--; + device_type::get_mutex(device_id)); + device_type::get_num_execs(device_id)--; } static unsigned get_num_execs(unsigned device_id) { std::lock_guard guard( - device_type::mutex[device_id]); - return device_type::num_execs[device_id]; + device_type::get_mutex(device_id)); + return device_type::get_num_execs(device_id); } void populate_exec_info(const MachineTopology *mach_topo) override; @@ -1677,22 +1677,22 @@ class HipExecutor : public detail::ExecutorBase, static void increase_num_execs(int device_id) { std::lock_guard guard( - device_type::mutex[device_id]); - device_type::num_execs[device_id]++; + device_type::get_mutex(device_id)); + device_type::get_num_execs(device_id)++; } static void decrease_num_execs(int device_id) { std::lock_guard guard( - device_type::mutex[device_id]); - device_type::num_execs[device_id]--; + device_type::get_mutex(device_id)); + device_type::get_num_execs(device_id)--; } static int get_num_execs(int device_id) { std::lock_guard guard( - device_type::mutex[device_id]); - return device_type::num_execs[device_id]; + device_type::get_mutex(device_id)); + return device_type::get_num_execs(device_id); } void populate_exec_info(const MachineTopology *mach_topo) override; From 9a2c6d1010dc9b398d2e907790179e893a7790b7 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 8 Jul 2021 17:01:45 +0200 Subject: [PATCH 05/11] add cudacuda and hiphip test Co-authored-by: Pratik Nayak --- cuda/test/base/cuda_executor_reset.cu | 33 ++++++++++++++++++++++++ hip/test/base/hip_executor_reset.hip.cpp | 33 ++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/cuda/test/base/cuda_executor_reset.cu b/cuda/test/base/cuda_executor_reset.cu index b63318b9afc..4807354bdea 100644 --- a/cuda/test/base/cuda_executor_reset.cu +++ b/cuda/test/base/cuda_executor_reset.cu @@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include @@ -62,4 +65,34 @@ TEST(DeviceReset, CudaHip) } +template +void func() +{ + auto ref = gko::ReferenceExecutor::create(); + auto exec = Executor::create(0, ref, true); +} + + +TEST(DeviceReset, CudaCuda) +{ + GTEST_ASSERT_NO_EXIT({ + std::thread t1(func); + std::thread t2(func); + t1.join(); + t2.join(); + }); +} + + +TEST(DeviceReset, HipHip) +{ + GTEST_ASSERT_NO_EXIT({ + std::thread t1(func); + std::thread t2(func); + t1.join(); + t2.join(); + }); +} + + } // namespace diff --git a/hip/test/base/hip_executor_reset.hip.cpp b/hip/test/base/hip_executor_reset.hip.cpp index b63318b9afc..4807354bdea 100644 --- a/hip/test/base/hip_executor_reset.hip.cpp +++ b/hip/test/base/hip_executor_reset.hip.cpp @@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include @@ -62,4 +65,34 @@ TEST(DeviceReset, CudaHip) } +template +void func() +{ + auto ref = gko::ReferenceExecutor::create(); + auto exec = Executor::create(0, ref, true); +} + + +TEST(DeviceReset, CudaCuda) +{ + GTEST_ASSERT_NO_EXIT({ + std::thread t1(func); + std::thread t2(func); + t1.join(); + t2.join(); + }); +} + + +TEST(DeviceReset, HipHip) +{ + GTEST_ASSERT_NO_EXIT({ + std::thread t1(func); + std::thread t2(func); + t1.join(); + t2.join(); + }); +} + + } // namespace From e08731b1f50412027acfd1e61bcb215ba9a8485c Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 9 Jul 2021 15:07:12 +0200 Subject: [PATCH 06/11] ensure ops affected by Reset after increase_exec --- cuda/test/base/CMakeLists.txt | 2 +- ...cutor_reset.cu => cuda_executor_reset.cpp} | 19 ++++--------------- hip/test/base/CMakeLists.txt | 2 +- ...r_reset.hip.cpp => hip_executor_reset.cpp} | 19 ++++--------------- include/ginkgo/core/base/executor.hpp | 12 ++++++++++-- 5 files changed, 20 insertions(+), 34 deletions(-) rename cuda/test/base/{cuda_executor_reset.cu => cuda_executor_reset.cpp} (87%) rename hip/test/base/{hip_executor_reset.hip.cpp => hip_executor_reset.cpp} (87%) diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index bc9124a30bf..8bd6194f134 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,6 +1,6 @@ ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) -ginkgo_create_cuda_test(cuda_executor_reset) +ginkgo_create_test(cuda_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_cuda_test(cuda_executor_topology NUMA::NUMA) diff --git a/cuda/test/base/cuda_executor_reset.cu b/cuda/test/base/cuda_executor_reset.cpp similarity index 87% rename from cuda/test/base/cuda_executor_reset.cu rename to cuda/test/base/cuda_executor_reset.cpp index 4807354bdea..5d9169be808 100644 --- a/cuda/test/base/cuda_executor_reset.cu +++ b/cuda/test/base/cuda_executor_reset.cpp @@ -45,6 +45,7 @@ namespace { #define GTEST_ASSERT_NO_EXIT(statement) \ ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") + TEST(DeviceReset, HipCuda) { GTEST_ASSERT_NO_EXIT({ @@ -65,30 +66,18 @@ TEST(DeviceReset, CudaHip) } -template void func() { auto ref = gko::ReferenceExecutor::create(); - auto exec = Executor::create(0, ref, true); + auto exec = gko::CudaExecutor::create(0, ref, true); } TEST(DeviceReset, CudaCuda) { GTEST_ASSERT_NO_EXIT({ - std::thread t1(func); - std::thread t2(func); - t1.join(); - t2.join(); - }); -} - - -TEST(DeviceReset, HipHip) -{ - GTEST_ASSERT_NO_EXIT({ - std::thread t1(func); - std::thread t2(func); + std::thread t1(func); + std::thread t2(func); t1.join(); t2.join(); }); diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index 14640d2fb2d..dd91d9e3c5f 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,5 +1,5 @@ ginkgo_create_hip_test(hip_executor) -ginkgo_create_hip_test(hip_executor_reset) +ginkgo_create_test(hip_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_hip_test(hip_executor_topology NUMA::NUMA) diff --git a/hip/test/base/hip_executor_reset.hip.cpp b/hip/test/base/hip_executor_reset.cpp similarity index 87% rename from hip/test/base/hip_executor_reset.hip.cpp rename to hip/test/base/hip_executor_reset.cpp index 4807354bdea..33a9c8040c0 100644 --- a/hip/test/base/hip_executor_reset.hip.cpp +++ b/hip/test/base/hip_executor_reset.cpp @@ -45,6 +45,7 @@ namespace { #define GTEST_ASSERT_NO_EXIT(statement) \ ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") + TEST(DeviceReset, HipCuda) { GTEST_ASSERT_NO_EXIT({ @@ -65,30 +66,18 @@ TEST(DeviceReset, CudaHip) } -template void func() { auto ref = gko::ReferenceExecutor::create(); - auto exec = Executor::create(0, ref, true); -} - - -TEST(DeviceReset, CudaCuda) -{ - GTEST_ASSERT_NO_EXIT({ - std::thread t1(func); - std::thread t2(func); - t1.join(); - t2.join(); - }); + auto exec = gko::HipExecutor::create(0, ref, true); } TEST(DeviceReset, HipHip) { GTEST_ASSERT_NO_EXIT({ - std::thread t1(func); - std::thread t2(func); + std::thread t1(func); + std::thread t2(func); t1.join(); t2.join(); }); diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 9e7f12ab74f..fb484871f9b 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1430,9 +1430,13 @@ class CudaExecutor : public detail::ExecutorBase, MachineTopology::get_instance()->bind_to_pus( this->get_closest_pus()); } + // it only gets attribute from device, so it should not be affected by + // DeviceReset. this->set_gpu_property(); - this->init_handles(); + // increase the number of executor before any operations may be affected + // by DeviceReset. increase_num_execs(this->get_exec_info().device_id); + this->init_handles(); } void *raw_alloc(size_type size) const override; @@ -1653,9 +1657,13 @@ class HipExecutor : public detail::ExecutorBase, MachineTopology::get_instance()->bind_to_pus( this->get_closest_pus()); } + // it only gets attribute from device, so it should not be affected by + // DeviceReset. this->set_gpu_property(); - this->init_handles(); + // increase the number of executor before any operations may be affected + // by DeviceReset. increase_num_execs(this->get_exec_info().device_id); + this->init_handles(); } void *raw_alloc(size_type size) const override; From 23d3197f2dee0f671df564ace05acccef49057e3 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 29 Jul 2021 08:56:36 +0200 Subject: [PATCH 07/11] use device_class and move definition to cpp link thread in test Co-authored-by: Terry Cojean --- cuda/base/executor.cpp | 2 +- cuda/test/base/CMakeLists.txt | 2 +- devices/CMakeLists.txt | 2 +- devices/device.cpp | 68 +++++++++++++++++++++++++++ hip/base/executor.hip.cpp | 2 +- hip/test/base/CMakeLists.txt | 2 +- include/ginkgo/core/base/device.hpp | 24 ++-------- include/ginkgo/core/base/executor.hpp | 30 ++++++------ 8 files changed, 92 insertions(+), 40 deletions(-) create mode 100644 devices/device.cpp diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 47a712c3eba..4091a251aeb 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -66,7 +66,7 @@ std::shared_ptr CudaExecutor::create( [device_id](CudaExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - device_type::get_mutex(device_id)); + device_class::get_mutex(device_id)); delete exec; if (!CudaExecutor::get_num_execs(device_id) && device_reset) { cuda::device_guard g(device_id); diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index 8bd6194f134..c23efefa8ed 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,6 +1,6 @@ ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) -ginkgo_create_test(cuda_executor_reset) +ginkgo_create_thread_test(cuda_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_cuda_test(cuda_executor_topology NUMA::NUMA) diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index 0c0e80febaf..09797aafe49 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -16,7 +16,7 @@ function(ginkgo_add_library name) set_target_properties(${name} PROPERTIES POSITION_INDEPENDENT_CODE ON) endfunction() -ginkgo_add_library(ginkgo_device machine_topology.cpp) +ginkgo_add_library(ginkgo_device machine_topology.cpp device.cpp) ginkgo_install_library(ginkgo_device) add_subdirectory(cuda) diff --git a/devices/device.cpp b/devices/device.cpp new file mode 100644 index 00000000000..79f7da20077 --- /dev/null +++ b/devices/device.cpp @@ -0,0 +1,68 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include + + +namespace gko { + + +std::recursive_mutex &NvidiaDevice::get_mutex(int i) +{ + static std::recursive_mutex mutex[max_devices]; + return mutex[i]; +} + +int &NvidiaDevice::get_num_execs(int i) +{ + static int num_execs[max_devices]; + return num_execs[i]; +} + +std::recursive_mutex &AmdDevice::get_mutex(int i) +{ + static std::recursive_mutex mutex[max_devices]; + return mutex[i]; +} + +int &AmdDevice::get_num_execs(int i) +{ + static int num_execs[max_devices]; + return num_execs[i]; +} + + +} // namespace gko diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 36df8fa7152..d0d64110289 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -65,7 +65,7 @@ std::shared_ptr HipExecutor::create( [device_id](HipExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - device_type::get_mutex(device_id)); + device_class::get_mutex(device_id)); delete exec; if (!HipExecutor::get_num_execs(device_id) && device_reset) { hip::device_guard g(device_id); diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index dd91d9e3c5f..4529a40c862 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,5 +1,5 @@ ginkgo_create_hip_test(hip_executor) -ginkgo_create_test(hip_executor_reset) +ginkgo_create_thread_test(hip_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_hip_test(hip_executor_topology NUMA::NUMA) diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index f60fecae736..025927ee367 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -70,11 +70,7 @@ class NvidiaDevice { * * @return recursive_mutex reference */ - static std::recursive_mutex &get_mutex(int i) - { - static std::recursive_mutex mutex[max_devices]; - return mutex[i]; - } + static std::recursive_mutex &get_mutex(int i); /** * get_num_execs gets the static num_execs reference at i. @@ -83,11 +79,7 @@ class NvidiaDevice { * * @return int reference */ - static int &get_num_execs(int i) - { - static int num_execs[max_devices]; - return num_execs[i]; - } + static int &get_num_execs(int i); static constexpr int max_devices = 64; }; @@ -112,11 +104,7 @@ class AmdDevice { * * @return recursive_mutex reference */ - static std::recursive_mutex &get_mutex(int i) - { - static std::recursive_mutex mutex[max_devices]; - return mutex[i]; - } + static std::recursive_mutex &get_mutex(int i); /** * get_num_execs gets the static num_execs reference at i. @@ -125,11 +113,7 @@ class AmdDevice { * * @return int reference */ - static int &get_num_execs(int i) - { - static int num_execs[max_devices]; - return num_execs[i]; - } + static int &get_num_execs(int i); static constexpr int max_devices = 64; }; diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index fb484871f9b..8c172bc4ed9 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1409,7 +1409,7 @@ class CudaExecutor : public detail::ExecutorBase, int get_closest_numa() const { return this->get_exec_info().numa_node; } protected: - using device_type = NvidiaDevice; + using device_class = NvidiaDevice; void set_gpu_property(); @@ -1458,22 +1458,22 @@ class CudaExecutor : public detail::ExecutorBase, static void increase_num_execs(unsigned device_id) { std::lock_guard guard( - device_type::get_mutex(device_id)); - device_type::get_num_execs(device_id)++; + device_class::get_mutex(device_id)); + device_class::get_num_execs(device_id)++; } static void decrease_num_execs(unsigned device_id) { std::lock_guard guard( - device_type::get_mutex(device_id)); - device_type::get_num_execs(device_id)--; + device_class::get_mutex(device_id)); + device_class::get_num_execs(device_id)--; } static unsigned get_num_execs(unsigned device_id) { std::lock_guard guard( - device_type::get_mutex(device_id)); - return device_type::get_num_execs(device_id); + device_class::get_mutex(device_id)); + return device_class::get_num_execs(device_id); } void populate_exec_info(const MachineTopology *mach_topo) override; @@ -1633,9 +1633,9 @@ class HipExecutor : public detail::ExecutorBase, protected: #if (GINKGO_HIP_PLATFORM_NVCC == 1) - using device_type = NvidiaDevice; + using device_class = NvidiaDevice; #else - using device_type = AmdDevice; + using device_class = AmdDevice; #endif void set_gpu_property(); @@ -1685,22 +1685,22 @@ class HipExecutor : public detail::ExecutorBase, static void increase_num_execs(int device_id) { std::lock_guard guard( - device_type::get_mutex(device_id)); - device_type::get_num_execs(device_id)++; + device_class::get_mutex(device_id)); + device_class::get_num_execs(device_id)++; } static void decrease_num_execs(int device_id) { std::lock_guard guard( - device_type::get_mutex(device_id)); - device_type::get_num_execs(device_id)--; + device_class::get_mutex(device_id)); + device_class::get_num_execs(device_id)--; } static int get_num_execs(int device_id) { std::lock_guard guard( - device_type::get_mutex(device_id)); - return device_type::get_num_execs(device_id); + device_class::get_mutex(device_id)); + return device_class::get_num_execs(device_id); } void populate_exec_info(const MachineTopology *mach_topo) override; From 2da69cd8592e9437e66cbb73c8f5d27c1bef64c6 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 29 Jul 2021 17:23:30 +0200 Subject: [PATCH 08/11] handle deleter in create not destructor of class it makes us keep mutex not recursive_mutex Co-authored-by: Terry Cojean --- cuda/base/executor.cpp | 6 ++++-- devices/device.cpp | 8 ++++---- hip/base/executor.hip.cpp | 6 ++++-- include/ginkgo/core/base/device.hpp | 4 ++-- include/ginkgo/core/base/executor.hpp | 22 ++++++---------------- 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 4091a251aeb..2898ff3126c 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -65,10 +65,12 @@ std::shared_ptr CudaExecutor::create( alloc_mode), [device_id](CudaExecutor *exec) { auto device_reset = exec->get_device_reset(); - std::lock_guard guard( + std::lock_guard guard( device_class::get_mutex(device_id)); delete exec; - if (!CudaExecutor::get_num_execs(device_id) && device_reset) { + auto &num_execs = device_class::get_num_execs(device_id); + num_execs--; + if (!num_execs && device_reset) { cuda::device_guard g(device_id); cudaDeviceReset(); } diff --git a/devices/device.cpp b/devices/device.cpp index 79f7da20077..c70fdaaffb8 100644 --- a/devices/device.cpp +++ b/devices/device.cpp @@ -40,9 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -std::recursive_mutex &NvidiaDevice::get_mutex(int i) +std::mutex &NvidiaDevice::get_mutex(int i) { - static std::recursive_mutex mutex[max_devices]; + static std::mutex mutex[max_devices]; return mutex[i]; } @@ -52,9 +52,9 @@ int &NvidiaDevice::get_num_execs(int i) return num_execs[i]; } -std::recursive_mutex &AmdDevice::get_mutex(int i) +std::mutex &AmdDevice::get_mutex(int i) { - static std::recursive_mutex mutex[max_devices]; + static std::mutex mutex[max_devices]; return mutex[i]; } diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index d0d64110289..c5b1c32920b 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -64,10 +64,12 @@ std::shared_ptr HipExecutor::create( new HipExecutor(device_id, std::move(master), device_reset, alloc_mode), [device_id](HipExecutor *exec) { auto device_reset = exec->get_device_reset(); - std::lock_guard guard( + std::lock_guard guard( device_class::get_mutex(device_id)); delete exec; - if (!HipExecutor::get_num_execs(device_id) && device_reset) { + auto &num_execs = device_class::get_num_execs(device_id); + num_execs--; + if (!num_execs && device_reset) { hip::device_guard g(device_id); hipDeviceReset(); } diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index 025927ee367..d40ad7f6eef 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -70,7 +70,7 @@ class NvidiaDevice { * * @return recursive_mutex reference */ - static std::recursive_mutex &get_mutex(int i); + static std::mutex &get_mutex(int i); /** * get_num_execs gets the static num_execs reference at i. @@ -104,7 +104,7 @@ class AmdDevice { * * @return recursive_mutex reference */ - static std::recursive_mutex &get_mutex(int i); + static std::mutex &get_mutex(int i); /** * get_num_execs gets the static num_execs reference at i. diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 8c172bc4ed9..1b9da581f30 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1302,8 +1302,6 @@ class CudaExecutor : public detail::ExecutorBase, bool device_reset = false, allocation_mode alloc_mode = default_cuda_alloc_mode); - ~CudaExecutor() { decrease_num_execs(this->get_device_id()); } - std::shared_ptr get_master() noexcept override; std::shared_ptr get_master() const noexcept override; @@ -1457,22 +1455,19 @@ class CudaExecutor : public detail::ExecutorBase, static void increase_num_execs(unsigned device_id) { - std::lock_guard guard( - device_class::get_mutex(device_id)); + std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)++; } static void decrease_num_execs(unsigned device_id) { - std::lock_guard guard( - device_class::get_mutex(device_id)); + std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)--; } static unsigned get_num_execs(unsigned device_id) { - std::lock_guard guard( - device_class::get_mutex(device_id)); + std::lock_guard guard(device_class::get_mutex(device_id)); return device_class::get_num_execs(device_id); } @@ -1525,8 +1520,6 @@ class HipExecutor : public detail::ExecutorBase, bool device_reset = false, allocation_mode alloc_mode = default_hip_alloc_mode); - ~HipExecutor() { decrease_num_execs(this->get_device_id()); } - std::shared_ptr get_master() noexcept override; std::shared_ptr get_master() const noexcept override; @@ -1684,22 +1677,19 @@ class HipExecutor : public detail::ExecutorBase, static void increase_num_execs(int device_id) { - std::lock_guard guard( - device_class::get_mutex(device_id)); + std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)++; } static void decrease_num_execs(int device_id) { - std::lock_guard guard( - device_class::get_mutex(device_id)); + std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)--; } static int get_num_execs(int device_id) { - std::lock_guard guard( - device_class::get_mutex(device_id)); + std::lock_guard guard(device_class::get_mutex(device_id)); return device_class::get_num_execs(device_id); } From eb37c0ff5cb9692d5f6f0a811d19da9d99dee2e4 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 30 Jul 2021 11:43:01 +0200 Subject: [PATCH 09/11] increase/decrease num_exec when enabling module --- devices/device.cpp | 3 +++ include/ginkgo/config.hpp.in | 8 ++++++++ include/ginkgo/core/base/executor.hpp | 12 ++++++++++++ 3 files changed, 23 insertions(+) diff --git a/devices/device.cpp b/devices/device.cpp index c70fdaaffb8..3080044f675 100644 --- a/devices/device.cpp +++ b/devices/device.cpp @@ -46,18 +46,21 @@ std::mutex &NvidiaDevice::get_mutex(int i) return mutex[i]; } + int &NvidiaDevice::get_num_execs(int i) { static int num_execs[max_devices]; return num_execs[i]; } + std::mutex &AmdDevice::get_mutex(int i) { static std::mutex mutex[max_devices]; return mutex[i]; } + int &AmdDevice::get_num_execs(int i) { static int num_execs[max_devices]; diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 1c6a31ea481..c232f481fac 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -67,6 +67,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #cmakedefine GINKGO_MIXED_PRECISION +/* Should we compile cuda kenrels for Ginkgo? */ +#cmakedefine GINKGO_BUILD_CUDA + + +/* Should we compile hip kenrels for Ginkgo? */ +#cmakedefine GINKGO_BUILD_HIP + + /* What is HIP compiled for, hcc or nvcc? */ // clang-format off #define GINKGO_HIP_PLATFORM_HCC @GINKGO_HIP_PLATFORM_HCC@ diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 1b9da581f30..45d42f0b0f1 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1455,14 +1455,20 @@ class CudaExecutor : public detail::ExecutorBase, static void increase_num_execs(unsigned device_id) { +#ifdef GINKGO_BUILD_CUDA + // increase the Cuda Device count only when ginkgo build cuda std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)++; +#endif // GINKGO_BUILD_CUDA } static void decrease_num_execs(unsigned device_id) { +#ifdef GINKGO_BUILD_CUDA + // increase the Cuda Device count only when ginkgo build cuda std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)--; +#endif // GINKGO_BUILD_CUDA } static unsigned get_num_execs(unsigned device_id) @@ -1677,14 +1683,20 @@ class HipExecutor : public detail::ExecutorBase, static void increase_num_execs(int device_id) { +#ifdef GINKGO_BUILD_HIP + // increase the HIP Device count only when ginkgo build hip std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)++; +#endif // GINKGO_BUILD_HIP } static void decrease_num_execs(int device_id) { +#ifdef GINKGO_BUILD_HIP + // increase the HIP Device count only when ginkgo build hip std::lock_guard guard(device_class::get_mutex(device_id)); device_class::get_num_execs(device_id)--; +#endif // GINKGO_BUILD_HIP } static int get_num_execs(int device_id) From 78d0c00c753f77a79be27b7631e05bc9e1c5e458 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Sun, 1 Aug 2021 18:10:30 +0200 Subject: [PATCH 10/11] move all related to cmake into cpp not hpp Co-authored-by: Tobias Ribizel --- cuda/base/executor.cpp | 4 +- devices/cuda/executor.cpp | 27 +++++++++++++ devices/hip/executor.cpp | 34 ++++++++++++++++ hip/base/executor.hip.cpp | 11 +++++- include/ginkgo/config.hpp.in | 4 +- include/ginkgo/core/base/device.hpp | 7 ---- include/ginkgo/core/base/executor.hpp | 56 +++------------------------ 7 files changed, 80 insertions(+), 63 deletions(-) diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 2898ff3126c..1f179dc1224 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -66,9 +66,9 @@ std::shared_ptr CudaExecutor::create( [device_id](CudaExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - device_class::get_mutex(device_id)); + NvidiaDevice::get_mutex(device_id)); delete exec; - auto &num_execs = device_class::get_num_execs(device_id); + auto &num_execs = NvidiaDevice::get_num_execs(device_id); num_execs--; if (!num_execs && device_reset) { cuda::device_guard g(device_id); diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp index abdc93a4a08..708fe02dcd6 100644 --- a/devices/cuda/executor.cpp +++ b/devices/cuda/executor.cpp @@ -64,4 +64,31 @@ bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const } +void CudaExecutor::increase_num_execs(unsigned device_id) +{ +#ifdef GINKGO_BUILD_CUDA + // increase the Cuda Device count only when ginkgo build cuda + std::lock_guard guard(NvidiaDevice::get_mutex(device_id)); + NvidiaDevice::get_num_execs(device_id)++; +#endif // GINKGO_BUILD_CUDA +} + + +void CudaExecutor::decrease_num_execs(unsigned device_id) +{ +#ifdef GINKGO_BUILD_CUDA + // increase the Cuda Device count only when ginkgo build cuda + std::lock_guard guard(NvidiaDevice::get_mutex(device_id)); + NvidiaDevice::get_num_execs(device_id)--; +#endif // GINKGO_BUILD_CUDA +} + + +unsigned CudaExecutor::get_num_execs(unsigned device_id) +{ + std::lock_guard guard(NvidiaDevice::get_mutex(device_id)); + return NvidiaDevice::get_num_execs(device_id); +} + + } // namespace gko diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp index 0e755209f91..daf8c14ec58 100644 --- a/devices/hip/executor.cpp +++ b/devices/hip/executor.cpp @@ -61,4 +61,38 @@ bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const } +#if (GINKGO_HIP_PLATFORM_NVCC == 1) +using hip_device_class = NvidiaDevice; +#else +using hip_device_class = AmdDevice; +#endif + + +void HipExecutor::increase_num_execs(int device_id) +{ +#ifdef GINKGO_BUILD_HIP + // increase the HIP Device count only when ginkgo build hip + std::lock_guard guard(hip_device_class::get_mutex(device_id)); + hip_device_class::get_num_execs(device_id)++; +#endif // GINKGO_BUILD_HIP +} + + +void HipExecutor::decrease_num_execs(int device_id) +{ +#ifdef GINKGO_BUILD_HIP + // increase the HIP Device count only when ginkgo build hip + std::lock_guard guard(hip_device_class::get_mutex(device_id)); + hip_device_class::get_num_execs(device_id)--; +#endif // GINKGO_BUILD_HIP +} + + +int HipExecutor::get_num_execs(int device_id) +{ + std::lock_guard guard(hip_device_class::get_mutex(device_id)); + return hip_device_class::get_num_execs(device_id); +} + + } // namespace gko diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index c5b1c32920b..11ea4ab8ead 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -56,6 +56,13 @@ namespace gko { #include "common/base/executor.hpp.inc" +#if (GINKGO_HIP_PLATFORM_NVCC == 1) +using hip_device_class = NvidiaDevice; +#else +using hip_device_class = AmdDevice; +#endif + + std::shared_ptr HipExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode) @@ -65,9 +72,9 @@ std::shared_ptr HipExecutor::create( [device_id](HipExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - device_class::get_mutex(device_id)); + hip_device_class::get_mutex(device_id)); delete exec; - auto &num_execs = device_class::get_num_execs(device_id); + auto &num_execs = hip_device_class::get_num_execs(device_id); num_execs--; if (!num_execs && device_reset) { hip::device_guard g(device_id); diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index c232f481fac..42f2c56f44d 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -67,11 +67,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #cmakedefine GINKGO_MIXED_PRECISION -/* Should we compile cuda kenrels for Ginkgo? */ +/* Should we compile cuda kernels for Ginkgo? */ #cmakedefine GINKGO_BUILD_CUDA -/* Should we compile hip kenrels for Ginkgo? */ +/* Should we compile hip kernels for Ginkgo? */ #cmakedefine GINKGO_BUILD_HIP diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index d40ad7f6eef..981191ba73c 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -57,10 +57,7 @@ class HipExecutor; */ class NvidiaDevice { friend class CudaExecutor; -// If Hip is compiled for NVCC, give NvidiaDevice's permission to HipExecutor -#if GINKGO_HIP_PLATFORM_NVCC friend class HipExecutor; -#endif private: /** @@ -90,11 +87,7 @@ class NvidiaDevice { * corresponding recursive_mutex. */ class AmdDevice { -// to avoid both GINKGO_HIP_PLATFORM_* zero -// If Hip isn't compiled for NVCC, give AmdDevice's permission to HipExecutor -#if !GINKGO_HIP_PLATFORM_NVCC friend class HipExecutor; -#endif private: /** diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 45d42f0b0f1..59876d620cc 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1407,8 +1407,6 @@ class CudaExecutor : public detail::ExecutorBase, int get_closest_numa() const { return this->get_exec_info().numa_node; } protected: - using device_class = NvidiaDevice; - void set_gpu_property(); void init_handles(); @@ -1453,29 +1451,11 @@ class CudaExecutor : public detail::ExecutorBase, bool verify_memory_to(const CudaExecutor *dest_exec) const override; - static void increase_num_execs(unsigned device_id) - { -#ifdef GINKGO_BUILD_CUDA - // increase the Cuda Device count only when ginkgo build cuda - std::lock_guard guard(device_class::get_mutex(device_id)); - device_class::get_num_execs(device_id)++; -#endif // GINKGO_BUILD_CUDA - } + static void increase_num_execs(unsigned device_id); - static void decrease_num_execs(unsigned device_id) - { -#ifdef GINKGO_BUILD_CUDA - // increase the Cuda Device count only when ginkgo build cuda - std::lock_guard guard(device_class::get_mutex(device_id)); - device_class::get_num_execs(device_id)--; -#endif // GINKGO_BUILD_CUDA - } + static void decrease_num_execs(unsigned device_id); - static unsigned get_num_execs(unsigned device_id) - { - std::lock_guard guard(device_class::get_mutex(device_id)); - return device_class::get_num_execs(device_id); - } + static unsigned get_num_execs(unsigned device_id); void populate_exec_info(const MachineTopology *mach_topo) override; @@ -1631,12 +1611,6 @@ class HipExecutor : public detail::ExecutorBase, } protected: -#if (GINKGO_HIP_PLATFORM_NVCC == 1) - using device_class = NvidiaDevice; -#else - using device_class = AmdDevice; -#endif - void set_gpu_property(); void init_handles(); @@ -1681,29 +1655,11 @@ class HipExecutor : public detail::ExecutorBase, bool verify_memory_to(const HipExecutor *dest_exec) const override; - static void increase_num_execs(int device_id) - { -#ifdef GINKGO_BUILD_HIP - // increase the HIP Device count only when ginkgo build hip - std::lock_guard guard(device_class::get_mutex(device_id)); - device_class::get_num_execs(device_id)++; -#endif // GINKGO_BUILD_HIP - } + static void increase_num_execs(int device_id); - static void decrease_num_execs(int device_id) - { -#ifdef GINKGO_BUILD_HIP - // increase the HIP Device count only when ginkgo build hip - std::lock_guard guard(device_class::get_mutex(device_id)); - device_class::get_num_execs(device_id)--; -#endif // GINKGO_BUILD_HIP - } + static void decrease_num_execs(int device_id); - static int get_num_execs(int device_id) - { - std::lock_guard guard(device_class::get_mutex(device_id)); - return device_class::get_num_execs(device_id); - } + static int get_num_execs(int device_id); void populate_exec_info(const MachineTopology *mach_topo) override; From c22fbb3597cfbd008a0072374fd19e48b73668cd Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Mon, 2 Aug 2021 14:19:19 +0200 Subject: [PATCH 11/11] use snake_case and move the compile_definitions Co-authored-by: Terry Cojean Co-authored-by: Tobias Ribizel --- cuda/base/executor.cpp | 4 ++-- devices/cuda/CMakeLists.txt | 3 +++ devices/cuda/executor.cpp | 20 ++++++++++---------- devices/device.cpp | 8 ++++---- devices/hip/CMakeLists.txt | 3 +++ devices/hip/executor.cpp | 12 ++++++------ hip/base/executor.hip.cpp | 4 ++-- include/ginkgo/config.hpp.in | 8 -------- include/ginkgo/core/base/device.hpp | 8 ++++---- 9 files changed, 34 insertions(+), 36 deletions(-) diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 1f179dc1224..1c35af4d390 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -66,9 +66,9 @@ std::shared_ptr CudaExecutor::create( [device_id](CudaExecutor *exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( - NvidiaDevice::get_mutex(device_id)); + nvidia_device::get_mutex(device_id)); delete exec; - auto &num_execs = NvidiaDevice::get_num_execs(device_id); + auto &num_execs = nvidia_device::get_num_execs(device_id); num_execs--; if (!num_execs && device_reset) { cuda::device_guard g(device_id); diff --git a/devices/cuda/CMakeLists.txt b/devices/cuda/CMakeLists.txt index acd0455ff2a..b9d1bca187b 100644 --- a/devices/cuda/CMakeLists.txt +++ b/devices/cuda/CMakeLists.txt @@ -1,2 +1,5 @@ ginkgo_add_object_library(ginkgo_cuda_device executor.cpp) +if(GINKGO_BUILD_CUDA) + target_compile_definitions(ginkgo_cuda_device PRIVATE GKO_COMPILING_CUDA_DEVICE) +endif() diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp index 708fe02dcd6..06e8d504097 100644 --- a/devices/cuda/executor.cpp +++ b/devices/cuda/executor.cpp @@ -66,28 +66,28 @@ bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const void CudaExecutor::increase_num_execs(unsigned device_id) { -#ifdef GINKGO_BUILD_CUDA +#ifdef GKO_COMPILING_CUDA_DEVICE // increase the Cuda Device count only when ginkgo build cuda - std::lock_guard guard(NvidiaDevice::get_mutex(device_id)); - NvidiaDevice::get_num_execs(device_id)++; -#endif // GINKGO_BUILD_CUDA + std::lock_guard guard(nvidia_device::get_mutex(device_id)); + nvidia_device::get_num_execs(device_id)++; +#endif // GKO_COMPILING_CUDA_DEVICE } void CudaExecutor::decrease_num_execs(unsigned device_id) { -#ifdef GINKGO_BUILD_CUDA +#ifdef GKO_COMPILING_CUDA_DEVICE // increase the Cuda Device count only when ginkgo build cuda - std::lock_guard guard(NvidiaDevice::get_mutex(device_id)); - NvidiaDevice::get_num_execs(device_id)--; -#endif // GINKGO_BUILD_CUDA + std::lock_guard guard(nvidia_device::get_mutex(device_id)); + nvidia_device::get_num_execs(device_id)--; +#endif // GKO_COMPILING_CUDA_DEVICE } unsigned CudaExecutor::get_num_execs(unsigned device_id) { - std::lock_guard guard(NvidiaDevice::get_mutex(device_id)); - return NvidiaDevice::get_num_execs(device_id); + std::lock_guard guard(nvidia_device::get_mutex(device_id)); + return nvidia_device::get_num_execs(device_id); } diff --git a/devices/device.cpp b/devices/device.cpp index 3080044f675..ca6597d652d 100644 --- a/devices/device.cpp +++ b/devices/device.cpp @@ -40,28 +40,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -std::mutex &NvidiaDevice::get_mutex(int i) +std::mutex &nvidia_device::get_mutex(int i) { static std::mutex mutex[max_devices]; return mutex[i]; } -int &NvidiaDevice::get_num_execs(int i) +int &nvidia_device::get_num_execs(int i) { static int num_execs[max_devices]; return num_execs[i]; } -std::mutex &AmdDevice::get_mutex(int i) +std::mutex &amd_device::get_mutex(int i) { static std::mutex mutex[max_devices]; return mutex[i]; } -int &AmdDevice::get_num_execs(int i) +int &amd_device::get_num_execs(int i) { static int num_execs[max_devices]; return num_execs[i]; diff --git a/devices/hip/CMakeLists.txt b/devices/hip/CMakeLists.txt index 9995e51c5e7..d05db433984 100644 --- a/devices/hip/CMakeLists.txt +++ b/devices/hip/CMakeLists.txt @@ -1,2 +1,5 @@ ginkgo_add_object_library(ginkgo_hip_device executor.cpp) +if(GINKGO_BUILD_HIP) + target_compile_definitions(ginkgo_hip_device PRIVATE GKO_COMPILING_HIP_DEVICE) +endif() diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp index daf8c14ec58..53da0db72c2 100644 --- a/devices/hip/executor.cpp +++ b/devices/hip/executor.cpp @@ -62,29 +62,29 @@ bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const #if (GINKGO_HIP_PLATFORM_NVCC == 1) -using hip_device_class = NvidiaDevice; +using hip_device_class = nvidia_device; #else -using hip_device_class = AmdDevice; +using hip_device_class = amd_device; #endif void HipExecutor::increase_num_execs(int device_id) { -#ifdef GINKGO_BUILD_HIP +#ifdef GKO_COMPILING_HIP_DEVICE // increase the HIP Device count only when ginkgo build hip std::lock_guard guard(hip_device_class::get_mutex(device_id)); hip_device_class::get_num_execs(device_id)++; -#endif // GINKGO_BUILD_HIP +#endif // GKO_COMPILING_HIP_DEVICE } void HipExecutor::decrease_num_execs(int device_id) { -#ifdef GINKGO_BUILD_HIP +#ifdef GKO_COMPILING_HIP_DEVICE // increase the HIP Device count only when ginkgo build hip std::lock_guard guard(hip_device_class::get_mutex(device_id)); hip_device_class::get_num_execs(device_id)--; -#endif // GINKGO_BUILD_HIP +#endif // GKO_COMPILING_HIP_DEVICE } diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 11ea4ab8ead..3875ad95910 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -57,9 +57,9 @@ namespace gko { #if (GINKGO_HIP_PLATFORM_NVCC == 1) -using hip_device_class = NvidiaDevice; +using hip_device_class = nvidia_device; #else -using hip_device_class = AmdDevice; +using hip_device_class = amd_device; #endif diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 42f2c56f44d..1c6a31ea481 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -67,14 +67,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #cmakedefine GINKGO_MIXED_PRECISION -/* Should we compile cuda kernels for Ginkgo? */ -#cmakedefine GINKGO_BUILD_CUDA - - -/* Should we compile hip kernels for Ginkgo? */ -#cmakedefine GINKGO_BUILD_HIP - - /* What is HIP compiled for, hcc or nvcc? */ // clang-format off #define GINKGO_HIP_PLATFORM_HCC @GINKGO_HIP_PLATFORM_HCC@ diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index 981191ba73c..5eaf892789d 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -52,10 +52,10 @@ class HipExecutor; /** - * NvidiaDevice handles the number of executor on Nvidia devices and have the + * nvidia_device handles the number of executor on Nvidia devices and have the * corresponding recursive_mutex. */ -class NvidiaDevice { +class nvidia_device { friend class CudaExecutor; friend class HipExecutor; @@ -83,10 +83,10 @@ class NvidiaDevice { /** - * AmdDevice handles the number of executor on Amd devices and have the + * amd_device handles the number of executor on Amd devices and have the * corresponding recursive_mutex. */ -class AmdDevice { +class amd_device { friend class HipExecutor; private: