From ddb19336ad7d85c1242e6d3096ce501b45f0b4fe Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 13:58:57 +0200 Subject: [PATCH 01/59] Add MPI layer CMake setup --- CMakeLists.txt | 31 +++++++++++++++++++++ cmake/autodetect_executors.cmake | 9 ++++++ cmake/create_test.cmake | 27 ++++++++++++++++++ cmake/get_info.cmake | 6 +++- core/CMakeLists.txt | 7 ++++- core/device_hooks/CMakeLists.txt | 9 ++++++ devices/CMakeLists.txt | 1 + devices/mpi/CMakeLists.txt | 2 ++ mpi/CMakeLists.txt | 31 +++++++++++++++++++++ mpi/base/version.cpp | 48 ++++++++++++++++++++++++++++++++ mpi/get_info.cmake | 17 +++++++++++ 11 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 devices/mpi/CMakeLists.txt create mode 100644 mpi/CMakeLists.txt create mode 100644 mpi/base/version.cpp create mode 100644 mpi/get_info.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c04338bff88..f8295bd0adc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ option(GINKGO_BUILD_EXAMPLES "Build Ginkgo's examples" ON) option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON) option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON) option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP}) +option(GINKGO_BUILD_MPI "Compile the MPI module" ${GINKGO_HAS_MPI}) option(GINKGO_BUILD_DPCPP "Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP}) option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA}) @@ -189,6 +190,33 @@ else() message(STATUS "HWLOC is being forcibly switched off") endif() +set(GINKGO_HAVE_MPI 0) +set(GINKGO_HAVE_CUDA_AWARE_MPI 0) +if(GINKGO_BUILD_MPI) + find_package(MPI REQUIRED) + set(GINKGO_HAVE_MPI 1) + if(GINKGO_BUILD_CUDA AND (${GINKGO_FORCE_CUDA_AWARE_MPI} MATCHES "") ) + enable_language(CUDA) + try_run(GKO_CUDA_AWARE_RUN_STATUS GKO_CUDA_AWARE_COMPILE_STATUS + "${CMAKE_BINARY_DIR}" SOURCES "${CMAKE_SOURCE_DIR}/mpi/test/cuda-aware-mpi-test.cu" + COMPILE_DEFINITIONS "" + LINK_LIBRARIES MPI::MPI_C + RUN_OUTPUT_VARIABLE GKO_CUDA_AWARE_RUN_OUT + COMPILE_OUTPUT_VARIABLE GKO_CUDA_AWARE_COMPILE_OUT) + if( ${GKO_CUDA_AWARE_RUN_STATUS} MATCHES "FAILED_TO_RUN" ) + message(STATUS "MPI does not support CUDA, disabling CUDA-Aware features, everything will be staged through the host.") + else() + message(STATUS "MPI supports CUDA, enabling CUDA-Aware features") + set(GINKGO_HAVE_CUDA_AWARE_MPI 1) + endif() + endif() + if( ${GINKGO_FORCE_CUDA_AWARE_MPI} MATCHES "YES" ) + set(GINKGO_HAVE_CUDA_AWARE_MPI 1) + elseif( ${GINKGO_FORCE_CUDA_AWARE_MPI} MATCHES "NO" ) + set(GINKGO_HAVE_CUDA_AWARE_MPI 0) + endif() +endif() + # We keep using NVCC/HCC for consistency with previous releases even if AMD # updated everything to use NVIDIA/AMD in ROCM 4.1 set(GINKGO_HIP_PLATFORM_NVCC 0) @@ -261,6 +289,9 @@ endif() if (GINKGO_BUILD_OMP) add_subdirectory(omp) # High-performance omp kernels endif() +if(GINKGO_BUILD_MPI) + add_subdirectory(mpi) # The MPI module +endif() add_subdirectory(core) # Core Ginkgo types and top-level functions add_subdirectory(include) # Public API self-contained check if (GINKGO_BUILD_TESTS) diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake index 71e2456edbc..431b53cbd36 100644 --- a/cmake/autodetect_executors.cmake +++ b/cmake/autodetect_executors.cmake @@ -1,8 +1,10 @@ set(GINKGO_HAS_OMP OFF) +set(GINKGO_HAS_MPI OFF) set(GINKGO_HAS_CUDA OFF) set(GINKGO_HAS_DPCPP OFF) set(GINKGO_HAS_HIP OFF) find_package(OpenMP 3.0) +find_package(MPI) include(CheckLanguage) check_language(CUDA) try_compile(GKO_CAN_COMPILE_DPCPP ${PROJECT_BINARY_DIR}/dpcpp @@ -16,6 +18,13 @@ if(OpenMP_CXX_FOUND) set(GINKGO_HAS_OMP ON) endif() +if(MPI_FOUND) + if(NOT DEFINED GINKGO_BUILD_MPI) + message(STATUS "Enabling MPI executor") + endif() + set(GINKGO_HAS_MPI ON) +endif() + if(CMAKE_CUDA_COMPILER) if(NOT DEFINED GINKGO_BUILD_CUDA) message(STATUS "Enabling CUDA executor") diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index 01f30eb949d..b5df8320e63 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -60,6 +60,33 @@ function(ginkgo_create_thread_test test_name) ginkgo_set_test_target_properties(${test_name} ${test_target_name}) endfunction(ginkgo_create_thread_test) +function(ginkgo_create_mpi_test test_name num_mpi_procs) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") + add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) + target_include_directories("${TEST_TARGET_NAME}" + PRIVATE + "$" + ${MPI_INCLUDE_PATH} + ) + set_target_properties(${TEST_TARGET_NAME} PROPERTIES + OUTPUT_NAME ${test_name}) + if (GINKGO_CHECK_CIRCULAR_DEPS) + target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") + endif() + if("${GINKGO_MPI_EXEC_SUFFIX}" MATCHES ".openmpi" AND MPI_RUN_AS_ROOT) + set(OPENMPI_RUN_AS_ROOT_FLAG "--allow-run-as-root") + else() + set(OPENMPI_RUN_AS_ROOT_FLAG "") + endif() + target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) + target_link_libraries(${TEST_TARGET_NAME} PRIVATE ${MPI_C_LIBRARIES} ${MPI_CXX_LIBRARIES}) + set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) + add_test(NAME ${REL_BINARY_DIR}/${test_name} + COMMAND ${MPIEXEC_EXECUTABLE} ${test_param} ) +endfunction(ginkgo_create_mpi_test) + function(ginkgo_create_test_cpp_cuda_header test_name) ginkgo_build_test_name(${test_name} test_target_name) add_executable(${test_target_name} ${test_name}.cpp) diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake index 99c387d30d3..ec25986f150 100644 --- a/cmake/get_info.cmake +++ b/cmake/get_info.cmake @@ -127,7 +127,7 @@ foreach(log_type ${log_types}) ginkgo_print_module_footer(${${log_type}} "User configuration:") ginkgo_print_module_footer(${${log_type}} " Enabled modules:") ginkgo_print_foreach_variable(${${log_type}} - "GINKGO_BUILD_OMP;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_DPCPP") + "GINKGO_BUILD_OMP;GINKGO_BUILD_MPI;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_DPCPP") ginkgo_print_module_footer(${${log_type}} " Enabled features:") ginkgo_print_foreach_variable(${${log_type}} "GINKGO_MIXED_PRECISION") @@ -155,6 +155,10 @@ IF(GINKGO_BUILD_OMP) include(omp/get_info.cmake) ENDIF() +IF(GINKGO_BUILD_MPI) + include(mpi/get_info.cmake) +ENDIF() + IF(GINKGO_BUILD_CUDA) include(cuda/get_info.cmake) ENDIF() diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index df018dc063d..fe731282302 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -60,6 +60,11 @@ if(GINKGO_HAVE_PAPI_SDE) target_sources(ginkgo PRIVATE log/papi.cpp) endif() +if (GINKGO_BUILD_MPI) + target_link_libraries(ginkgo PUBLIC "${MPI_C_LIBRARIES}" "${MPI_CXX_LIBRARIES}") + target_include_directories(ginkgo SYSTEM PUBLIC ${MPI_INCLUDE_PATH}) +endif() + ginkgo_compile_features(ginkgo) target_compile_options(ginkgo PRIVATE "${GINKGO_COMPILER_FLAGS}") @@ -67,7 +72,7 @@ target_compile_options(ginkgo PRIVATE "${GINKGO_COMPILER_FLAGS}") # regardless of whether it is installed or added as a subdirectory add_library(Ginkgo::ginkgo ALIAS ginkgo) target_link_libraries(ginkgo - PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) + PUBLIC ginkgo_device ginkgo_omp ginkgo_mpi ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) # The PAPI dependency needs to be exposed to the user. set(GKO_RPATH_ADDITIONS "") if (GINKGO_HAVE_PAPI_SDE) diff --git a/core/device_hooks/CMakeLists.txt b/core/device_hooks/CMakeLists.txt index fcb370a81a0..1c658e73172 100644 --- a/core/device_hooks/CMakeLists.txt +++ b/core/device_hooks/CMakeLists.txt @@ -41,6 +41,15 @@ if (NOT GINKGO_BUILD_OMP) ginkgo_install_library(ginkgo_omp) endif() +if(NOT GINKGO_BUILD_MPI) + add_library(ginkgo_mpi + $ + mpi_hooks.cpp) + ginkgo_compile_features(ginkgo_mpi) + ginkgo_default_includes(ginkgo_mpi) + ginkgo_install_library(ginkgo_mpi mpi) +endif() + if (NOT GINKGO_BUILD_REFERENCE) add_library(ginkgo_reference $ diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index 09797aafe49..0f86fabfef5 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -23,4 +23,5 @@ add_subdirectory(cuda) add_subdirectory(dpcpp) add_subdirectory(hip) add_subdirectory(omp) +add_subdirectory(mpi) add_subdirectory(reference) diff --git a/devices/mpi/CMakeLists.txt b/devices/mpi/CMakeLists.txt new file mode 100644 index 00000000000..e86b0e0325c --- /dev/null +++ b/devices/mpi/CMakeLists.txt @@ -0,0 +1,2 @@ +ginkgo_add_object_library(ginkgo_mpi_device + dummy.cpp) diff --git a/mpi/CMakeLists.txt b/mpi/CMakeLists.txt new file mode 100644 index 00000000000..b8a5f336598 --- /dev/null +++ b/mpi/CMakeLists.txt @@ -0,0 +1,31 @@ +find_package(MPI REQUIRED) + +add_library(ginkgo_mpi $ "") +target_sources(ginkgo_mpi + PRIVATE + base/exception.cpp + base/bindings.cpp + base/version.cpp + ) + +ginkgo_compile_features(ginkgo_mpi) +target_include_directories(ginkgo_mpi + SYSTEM PRIVATE ${MPI_INCLUDE_PATH}) +target_link_libraries(ginkgo_mpi PRIVATE "${MPI_C_LIBRARIES}" "${MPI_CXX_LIBRARIES}") +target_compile_options(ginkgo_mpi PRIVATE "${GINKGO_COMPILER_FLAGS}") + +ginkgo_default_includes(ginkgo_mpi) +ginkgo_install_library(ginkgo_mpi mpi) + +if (GINKGO_CHECK_CIRCULAR_DEPS) + ginkgo_check_headers(ginkgo_mpi) +endif() + +if(GINKGO_BUILD_TESTS) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/test) + add_subdirectory(test) +endif() + +# Propagate some useful information +set(MPI_C_VERSION ${MPI_C_VERSION} PARENT_SCOPE) +set(MPI_C_LIBRARIES ${MPI_C_LIBRARIES} PARENT_SCOPE) diff --git a/mpi/base/version.cpp b/mpi/base/version.cpp new file mode 100644 index 00000000000..c3ee7ab7c78 --- /dev/null +++ b/mpi/base/version.cpp @@ -0,0 +1,48 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +namespace gko { + + +version version_info::get_mpi_version() noexcept +{ + // When compiling the module, the header version is the same as the library + // version. Mismatch between the header and the module versions may happen + // if using shared libraries from different versions of Ginkgo. + return version_info::get_header_version(); +} + + +} // namespace gko diff --git a/mpi/get_info.cmake b/mpi/get_info.cmake new file mode 100644 index 00000000000..ebcea60e4ae --- /dev/null +++ b/mpi/get_info.cmake @@ -0,0 +1,17 @@ +ginkgo_print_module_header(${detailed_log} "MPI") +ginkgo_print_variable(${detailed_log} "GINKGO_FORCE_CUDA_AWARE_MPI") +ginkgo_print_variable(${detailed_log} "GKO_CUDA_AWARE_RUN_STATUS") +ginkgo_print_variable(${detailed_log} "MPI_C_COMPILER") +ginkgo_print_variable(${detailed_log} "MPI_CXX_COMPILER") +ginkgo_print_variable(${detailed_log} "MPI_CXX_COMPILE_OPTIONS") +ginkgo_print_variable(${detailed_log} "MPI_CXX_LINK_FLAGS") +ginkgo_print_variable(${detailed_log} "MPI_CXX_LIB_NAMES") +ginkgo_print_variable(${detailed_log} "MPI_CXX_HEADER_DIR") +ginkgo_print_variable(${detailed_log} "MPI_mpi_LIBRARY") +ginkgo_print_variable(${detailed_log} "MPIEXEC_EXECUTABLE") +ginkgo_print_variable(${detailed_log} "MPIEXEC_MAX_NUMPROCS") +ginkgo_print_variable(${detailed_log} "MPIEXEC_NUMPROC_FLAG") +ginkgo_print_module_footer(${detailed_log} "MPI variables:") +ginkgo_print_variable(${detailed_log} "GINKGO_COMPILER_FLAGS") +ginkgo_print_variable(${detailed_log} "MPI_RUN_AS_ROOT") +ginkgo_print_module_footer(${detailed_log} "") From 7e1a4b9aa48ddb923c7363a3b5ba8c282c1a5552 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:00:09 +0200 Subject: [PATCH 02/59] Add basic classes and hooks --- core/device_hooks/mpi_hooks.cpp | 434 ++++++++++++++++++++++++++++ devices/mpi/dummy.cpp | 34 +++ include/ginkgo/core/base/mpi.hpp | 471 +++++++++++++++++++++++++++++++ 3 files changed, 939 insertions(+) create mode 100644 core/device_hooks/mpi_hooks.cpp create mode 100644 devices/mpi/dummy.cpp create mode 100644 include/ginkgo/core/base/mpi.hpp diff --git a/core/device_hooks/mpi_hooks.cpp b/core/device_hooks/mpi_hooks.cpp new file mode 100644 index 00000000000..cc9ad9566c8 --- /dev/null +++ b/core/device_hooks/mpi_hooks.cpp @@ -0,0 +1,434 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include + + +namespace gko { + + +version version_info::get_mpi_version() noexcept +{ + // We just return 1.0.0 with a special "not compiled" tag in placeholder + // modules. + return {1, 0, 0, "not compiled"}; +} + + +std::string MpiError::get_error(int64) +{ + return "ginkgo MPI module is not compiled"; +} + + +namespace mpi { + + +bool init_finalize::is_finalized() GKO_NOT_COMPILED(mpi); + + +bool init_finalize::is_initialized() GKO_NOT_COMPILED(mpi); + + +init_finalize::init_finalize(int& argc, char**& argv, + const size_type num_threads) GKO_NOT_COMPILED(mpi); + + +init_finalize::~init_finalize() {} + + +communicator::communicator(const MPI_Comm& comm) GKO_NOT_COMPILED(mpi); + + +communicator::communicator() GKO_NOT_COMPILED(mpi); + + +communicator::communicator(const MPI_Comm& comm_in, int color, int key) + GKO_NOT_COMPILED(mpi); + + +communicator::~communicator() {} + +info::info() GKO_NOT_COMPILED(mpi); + +void info::add(std::string key, std::string value) GKO_NOT_COMPILED(mpi); + + +void info::remove(std::string key) GKO_NOT_COMPILED(mpi); + + +info::~info() {} + + +bool communicator::compare(const MPI_Comm& comm) const GKO_NOT_COMPILED(mpi); + + +template +window::window(ValueType* base, unsigned int size, + std::shared_ptr comm, + const int disp_unit, info input_info, + win_type create_type) GKO_NOT_COMPILED(mpi); + + +template +void window::fence(int assert) GKO_NOT_COMPILED(mpi); + + +template +void window::lock(int rank, int assert, lock_type lock_t) + GKO_NOT_COMPILED(mpi); + + +template +void window::unlock(int rank) GKO_NOT_COMPILED(mpi); + + +template +void window::lock_all(int assert) GKO_NOT_COMPILED(mpi); + + +template +void window::unlock_all() GKO_NOT_COMPILED(mpi); + + +template +void window::flush(int rank) GKO_NOT_COMPILED(mpi); + + +template +void window::flush_local(int rank) GKO_NOT_COMPILED(mpi); + + +template +void window::flush_all() GKO_NOT_COMPILED(mpi); + + +template +void window::flush_all_local() GKO_NOT_COMPILED(mpi); + + +template +window::~window() +{} + + +MPI_Op create_operation( + const std::function func, + void* arg1, void* arg2, int* len, MPI_Datatype* type) GKO_NOT_COMPILED(mpi); + + +double get_walltime() GKO_NOT_COMPILED(mpi); + + +int get_my_rank(const communicator& comm) GKO_NOT_COMPILED(mpi); + + +int get_local_rank(const communicator& comm) GKO_NOT_COMPILED(mpi); + + +int get_num_ranks(const communicator& comm) GKO_NOT_COMPILED(mpi); + + +void synchronize(const communicator& comm) GKO_NOT_COMPILED(mpi); + + +void wait(std::shared_ptr req, std::shared_ptr status) + GKO_NOT_COMPILED(mpi); + + +template +void send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr req, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, + const int recv_tag, std::shared_ptr req, + std::shared_ptr status, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); + + +template +void get(GetType* origin_buffer, const int origin_count, const int target_rank, + const unsigned int target_disp, const int target_count, + window& window, std::shared_ptr req) + GKO_NOT_COMPILED(mpi); + + +template +void broadcast(BroadcastType* buffer, int count, int root_rank, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, + op_type op_enum, int root_rank, std::shared_ptr req, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, + std::shared_ptr comm, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); + + +template +void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, op_type op_enum, + std::shared_ptr comm, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); + + +template +void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void all_gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void scatter(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void scatter(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank, + std::shared_ptr comm) GKO_NOT_COMPILED(mpi); + + +template +void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, + op_type op_enum, std::shared_ptr comm) + GKO_NOT_COMPILED(mpi); + + +template +void all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); + + +template +void all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); + + +template +void all_to_all(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + const int stride, std::shared_ptr comm, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); + + +#define GKO_DECLARE_WINDOW(ValueType) class window + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_WINDOW); + + +#define GKO_DECLARE_SEND(SendType) \ + void send(const SendType* send_buffer, const int send_count, \ + const int destination_rank, const int send_tag, \ + std::shared_ptr req, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SEND); + + +#define GKO_DECLARE_RECV(RecvType) \ + void recv(RecvType* recv_buffer, const int recv_count, \ + const int source_rank, const int recv_tag, \ + std::shared_ptr req, std::shared_ptr status, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_RECV); + + +#define GKO_DECLARE_PUT(PutType) \ + void put(const PutType* origin_buffer, const int origin_count, \ + const int target_rank, const unsigned int target_disp, \ + const int target_count, window& window, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_PUT); + + +#define GKO_DECLARE_GET(GetType) \ + void get(GetType* origin_buffer, const int origin_count, \ + const int target_rank, const unsigned int target_disp, \ + const int target_count, window& window, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_GET); + + +#define GKO_DECLARE_BCAST(BroadcastType) \ + void broadcast(BroadcastType* buffer, int count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_BCAST); + + +#define GKO_DECLARE_REDUCE(ReduceType) \ + void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ + int count, op_type operation, int root_rank, \ + std::shared_ptr req, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_REDUCE); + + +#define GKO_DECLARE_ALLREDUCE1(ReduceType) \ + void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE1); + +#define GKO_DECLARE_ALLREDUCE2(ReduceType) \ + void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ + int count, op_type operation, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE2); + + +#define GKO_DECLARE_GATHER1(SendType, RecvType) \ + void gather(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER1); + + +#define GKO_DECLARE_GATHER2(SendType, RecvType) \ + void gather(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int* recv_counts, \ + const int* displacements, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER2); + + +#define GKO_DECLARE_ALLGATHER(SendType, RecvType) \ + void all_gather(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALLGATHER); + + +#define GKO_DECLARE_SCATTER1(SendType, RecvType) \ + void scatter(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER1); + + +#define GKO_DECLARE_SCATTER2(SendType, RecvType) \ + void scatter(const SendType* send_buffer, const int* send_counts, \ + const int* displacements, RecvType* recv_buffer, \ + const int recv_count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER2); + + +#define GKO_DECLARE_SCAN(ScanType) \ + void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, \ + op_type op_enum, std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SCAN); + + +#define GKO_DECLARE_ALL_TO_ALL1(RecvType) \ + void all_to_all(RecvType* recv_buffer, const int recv_count, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALL_TO_ALL1); + + +#define GKO_DECLARE_ALL_TO_ALL2(SendType, RecvType) \ + void all_to_all(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALL_TO_ALL2); + + +#define GKO_DECLARE_ALL_TO_ALL_V(SendType, RecvType) \ + void all_to_all(const SendType* send_buffer, const int* send_counts, \ + const int* send_offsets, RecvType* recv_buffer, \ + const int* recv_counts, const int* recv_offsets, \ + const int stride, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ALL_TO_ALL_V); + + +} // namespace mpi +} // namespace gko diff --git a/devices/mpi/dummy.cpp b/devices/mpi/dummy.cpp new file mode 100644 index 00000000000..14e18b6d1f9 --- /dev/null +++ b/devices/mpi/dummy.cpp @@ -0,0 +1,34 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// Remove this file once there is at least one source file in +// ginkgo_mpi_device diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp new file mode 100644 index 00000000000..4df9ed9dd13 --- /dev/null +++ b/include/ginkgo/core/base/mpi.hpp @@ -0,0 +1,471 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_MPI_HPP_ +#define GKO_PUBLIC_CORE_BASE_MPI_HPP_ + + +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include + + +#if GKO_HAVE_MPI + +#include + +#endif + + +#ifndef MPI_VERSION + +using MPI_Comm = int; +using MPI_Status = int; +using MPI_Request = int; +using MPI_Datatype = int; +using MPI_Op = int; +using MPI_Win = int*; +using MPI_Info = int*; + +#ifndef MPI_COMM_WORLD +#define MPI_COMM_WORLD 0 +#endif +#ifndef MPI_COMM_SELF +#define MPI_COMM_SELF 0 +#endif +#ifndef MPI_COMM_NULL +#define MPI_COMM_NULL 0 +#endif +#ifndef MPI_WIN_NULL +#define MPI_WIN_NULL nullptr +#endif +#ifndef MPI_REQUEST_NULL +#define MPI_REQUEST_NULL 0 +#endif +#ifndef MPI_INFO_NULL +#define MPI_INFO_NULL nullptr +#endif +#ifndef MPI_MIN +#define MPI_MIN 0 +#endif +#ifndef MPI_MAX +#define MPI_MAX 0 +#endif +#ifndef MPI_SUM +#define MPI_SUM 0 +#endif +#endif + + +template +using array_manager = std::unique_ptr>; + + +namespace gko { +namespace mpi { + +enum class op_type { + sum = 1, + min = 2, + max = 3, + product = 4, + custom = 5, + logical_and = 6, + bitwise_and = 7, + logical_or = 8, + bitwise_or = 9, + logical_xor = 10, + bitwise_xor = 11, + max_val_and_loc = 12, + min_val_and_loc = 13 +}; + + +/* + * Class that allows an RAII of initialization and calls MPI_Finalize at the + * end of its scope. Therefore this must be called before any of the MPI + * functions. + */ +class init_finalize { +public: + init_finalize(int& argc, char**& argv, const size_type num_threads = 1); + + init_finalize() = delete; + + init_finalize(init_finalize& other) = default; + + init_finalize& operator=(const init_finalize& other) = default; + + init_finalize(init_finalize&& other) = default; + + init_finalize& operator=(init_finalize&& other) = default; + + static bool is_finalized(); + + static bool is_initialized(); + + ~init_finalize(); + +private: + int num_args_; + int required_thread_support_; + int provided_thread_support_; + char** args_; +}; + + +/** + * A class holding and operating on the MPI_Info class. Stores the key value + * pair as a map and provides methods to access these values with keys as + * strings. + */ +class info { +public: + info(); + + info(MPI_Info input) { this->info_ = input; } + + void remove(std::string key); + + std::string& at(std::string& key) { return this->key_value_.at(key); } + + void add(std::string key, std::string value); + + MPI_Info get() { return this->info_; } + + ~info(); + +private: + std::map key_value_; + MPI_Info info_; +}; + + +/** + * A request class that takes in the given request and duplicates it + * for our purposes. As the class or object goes out of scope, the request + * is freed. + */ +class request : public EnableSharedCreateMethod { +public: + request(const int size) : req_(new MPI_Request[size]) {} + + request() : req_(new MPI_Request[1]) {} + + ~request() + { + if (req_) delete[] req_; + } + + MPI_Request* get_requests() const { return req_; } + +private: + MPI_Request* req_; +}; + + +/** + * A status class that takes in the given status and duplicates it + * for our purposes. As the class or object goes out of scope, the status + * is freed. + */ +class status : public EnableSharedCreateMethod { +public: + status(const int size) : status_(new MPI_Status[size]) {} + + status() : status_(new MPI_Status[1]) {} + + ~status() + { + if (status_) delete[] status_; + } + + MPI_Status* get_statuses() const { return status_; } + +private: + MPI_Status* status_; +}; + + +/** + * A communicator class that takes in the given communicator and duplicates it + * for our purposes. As the class or object goes out of scope, the communicator + * is freed. + */ +class communicator : public EnableSharedCreateMethod { +public: + communicator(const MPI_Comm& comm); + + communicator(const MPI_Comm& comm, int color, int key); + + communicator(); + + communicator(communicator& other); + + communicator& operator=(const communicator& other); + + communicator(communicator&& other); + + communicator& operator=(communicator&& other); + + static MPI_Comm get_comm_world() { return MPI_COMM_WORLD; } + + static std::shared_ptr create_world() + { + return std::make_shared(get_comm_world()); + } + + MPI_Comm get() const { return comm_; } + + int size() const { return size_; } + + int rank() const { return rank_; }; + + int local_rank() const { return local_rank_; }; + + bool compare(const MPI_Comm& other) const; + + bool operator==(const communicator& rhs) { return compare(rhs.get()); } + + ~communicator(); + +private: + MPI_Comm comm_; + int size_{}; + int rank_{}; + int local_rank_{}; +}; + + +class mpi_type { +public: + mpi_type(const int count, MPI_Datatype& old); + ~mpi_type(); + const MPI_Datatype& get() const { return this->type_; } + +private: + MPI_Datatype type_{}; +}; + + +template +class window { +public: + enum class win_type { allocate = 1, create = 2, dynamic_create = 3 }; + enum class lock_type { shared = 1, exclusive = 2 }; + + window() : window_(MPI_WIN_NULL) {} + window(window& other) = default; + window& operator=(const window& other) = default; + window(window&& other) = default; + window& operator=(window&& other) = default; + + window(ValueType* base, unsigned int size, + std::shared_ptr comm, + const int disp_unit = sizeof(ValueType), + info input_info = info(MPI_INFO_NULL), + win_type create_type = win_type::create); + + MPI_Win get() { return this->window_; } + + void fence(int assert = 0); + + void lock(int rank, int assert = 0, lock_type lock_t = lock_type::shared); + + void unlock(int rank); + + void lock_all(int assert = 0); + + void unlock_all(); + + void flush(int rank); + + void flush_local(int rank); + + void flush_all(); + + void flush_all_local(); + + ~window(); + +private: + MPI_Win window_; +}; + + +void synchronize(const communicator& comm = communicator::get_comm_world()); + + +void wait(std::shared_ptr req, std::shared_ptr status = {}); + + +double get_walltime(); + + +int get_my_rank(const communicator& comm = communicator::get_comm_world()); + + +int get_local_rank(const communicator& comm = communicator::get_comm_world()); + + +int get_num_ranks(const communicator& comm = communicator::get_comm_world()); + + +template +void send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr req = {}, + std::shared_ptr comm = {}); + + +template +void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, + const int recv_tag, std::shared_ptr req = {}, + std::shared_ptr status = {}, + std::shared_ptr comm = {}); + + +template +void put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window, + std::shared_ptr req = {}); + + +template +void get(GetType* origin_buffer, const int origin_count, const int target_rank, + const unsigned int target_disp, const int target_count, + window& window, std::shared_ptr req = {}); + + +template +void broadcast(BroadcastType* buffer, int count, int root_rank, + std::shared_ptr comm = {}); + + +template +void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, + op_type op_enum, int root_rank, + std::shared_ptr comm = {}, + std::shared_ptr req = {}); + + +template +void all_reduce(ReduceType* recv_buffer, int count, + op_type op_enum = op_type::sum, + std::shared_ptr comm = {}, + std::shared_ptr req = {}); + + +template +void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, op_type op_enum = op_type::sum, + std::shared_ptr comm = {}, + std::shared_ptr req = {}); + + +template +void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm = {}); + + +template +void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank, + std::shared_ptr comm = {}); + + +template +void all_gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm = {}); + + +template +void scatter(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm = {}); + + +template +void scatter(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank, + std::shared_ptr comm = {}); + + +template +void all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm = {}, + std::shared_ptr req = {}); + + +template +void all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count = {}, + std::shared_ptr comm = {}, + std::shared_ptr req = {}); + + +template +void all_to_all(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + const int stride = 1, + std::shared_ptr comm = {}, + std::shared_ptr req = {}); + + +template +void scan(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, + op_type op_enum = op_type::sum, + std::shared_ptr comm = {}); + + +} // namespace mpi +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_MPI_HPP_ From a56a6ffe81b15a9d7fd078618c1121917270a012 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:00:36 +0200 Subject: [PATCH 03/59] Update with MPI version --- include/ginkgo/core/base/version.hpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/ginkgo/core/base/version.hpp b/include/ginkgo/core/base/version.hpp index 2d41e366c0d..6d1d5aa7510 100644 --- a/include/ginkgo/core/base/version.hpp +++ b/include/ginkgo/core/base/version.hpp @@ -219,6 +219,13 @@ class version_info { */ version dpcpp_version; + /** + * Contains version information of the MPI module. + * + * This is the version of the static/shared library called "ginkgo_mpi". + */ + version mpi_version; + private: static constexpr version get_header_version() noexcept { @@ -238,6 +245,8 @@ class version_info { static version get_dpcpp_version() noexcept; + static version get_mpi_version() noexcept; + version_info() : header_version{get_header_version()}, core_version{get_core_version()}, @@ -245,7 +254,8 @@ class version_info { omp_version{get_omp_version()}, cuda_version{get_cuda_version()}, hip_version{get_hip_version()}, - dpcpp_version{get_dpcpp_version()} + dpcpp_version{get_dpcpp_version()}, + mpi_version{get_mpi_version()} {} }; From 426e48b2ee2aac9c27028fb14129935408ea23ed Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:00:50 +0200 Subject: [PATCH 04/59] Add a EnableSharedCreate interface --- .../ginkgo/core/base/polymorphic_object.hpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp index 71741479211..95b4ce5bcc9 100644 --- a/include/ginkgo/core/base/polymorphic_object.hpp +++ b/include/ginkgo/core/base/polymorphic_object.hpp @@ -649,6 +649,26 @@ class EnableCreateMethod { }; +/** + * This mixin implements a static `create()` method on `ConcreteType` that + * dynamically allocates the memory, uses the passed-in arguments to construct + * the object, and returns an std::unique_ptr to such an object. + * + * @tparam ConcreteObject the concrete type for which `create()` is being + * implemented [CRTP parameter] + */ +template +class EnableSharedCreateMethod { +public: + template + static std::shared_ptr create(Args&&... args) + { + return std::shared_ptr( + new ConcreteType(std::forward(args)...)); + } +}; + + } // namespace gko From a76b72747f5bbd4b818433c92579abcbb1e65aa9 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:01:09 +0200 Subject: [PATCH 05/59] Add new macros for instantiations --- include/ginkgo/core/base/types.hpp | 57 ++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 9c6827c4a11..d30739d5161 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -616,6 +616,63 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, #endif +/** + * Instantiates a template for each value type pair compiled by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, which are replaced by the + * value and index types. + */ +#define GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_PAIR(_macro) \ + template _macro(float, float); \ + template _macro(double, double); \ + template _macro(std::complex, float); \ + template _macro(std::complex, double); \ + template _macro(std::complex, std::complex); \ + template _macro(std::complex, std::complex) + + +/** + * Instantiates a template for each combined value and index type compiled by + * Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, which are replaced by the + * value and index types. + */ +#define GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(_macro) \ + template _macro(char, char); \ + template _macro(int32, int32); \ + template _macro(int64, int64); \ + template _macro(unsigned int, unsigned int); \ + template _macro(unsigned long, unsigned long); \ + template _macro(float, float); \ + template _macro(double, double); \ + template _macro(long double, long double); \ + template _macro(std::complex, std::complex); \ + template _macro(std::complex, std::complex) + +/** + * Instantiates a template for each value and index type compiled by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, which are replaced by the + * value and index types. + */ +#define GKO_INSTANTIATE_FOR_EACH_POD_TYPE(_macro) \ + template _macro(float); \ + template _macro(double); \ + template _macro(std::complex); \ + template _macro(std::complex); \ + template _macro(size_type); \ + template _macro(bool); \ + template _macro(int32); \ + template _macro(int64) + + /** * Instantiates a template for each normal type * From 0e8a031611b11d5ff431b4c1664a360e1e1adca5 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:01:32 +0200 Subject: [PATCH 06/59] Add exception handling and helpers --- include/ginkgo/core/base/exception.hpp | 58 +++++++++++++++++++ .../ginkgo/core/base/exception_helpers.hpp | 47 +++++++++++++++ mpi/base/exception.cpp | 55 ++++++++++++++++++ 3 files changed, 160 insertions(+) create mode 100644 mpi/base/exception.cpp diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 800e771fd14..8ff5f9e5e16 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -173,6 +173,64 @@ class NotSupported : public Error { }; +/** + * MpiError is thrown when a MPI routine throws a non-zero error code. + */ +class MpiError : public Error { +public: + /** + * Initializes a MPI error. + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The name of the MPI routine that failed + * @param error_code The resulting MPI error code + */ + MpiError(const std::string& file, int line, const std::string& func, + int64 error_code) + : Error(file, line, func + ": " + get_error(error_code)) + {} + +private: + static std::string get_error(int64 error_code); +}; + + +/** + * MpiError is thrown when a MPI has already been initialized. + * MPI_Init can only be called once in a program. + */ +class MpiInitialized : public Error { +public: + /** + * Initializes a MPI error. + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The name of the MPI routine that failed + */ + MpiInitialized(const std::string& file, int line, const std::string& func) + : Error(file, line, func) + {} +}; + + +/** + * MpiError is thrown when a MPI has already been finalized. + * Any MPI routines that are called after calling MPI_Finalize throw this error. + */ +class MpiFinalized : public Error { +public: + /** + * Initializes a MPI error. + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The name of the MPI routine that failed + */ + MpiFinalized(const std::string& file, int line, const std::string& func) + : Error(file, line, func) + {} +}; + + /** * CudaError is thrown when a CUDA routine throws a non-zero error code. */ diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index 99f2757244d..82439d56a75 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -298,6 +298,39 @@ inline dim<2> get_size(const dim<2>& size) { return size; } } +/** + * Instantiates a MpiError. + * + * @param errcode The error code returned from the MPI routine. + */ +#define GKO_MPI_ERROR(_errcode) \ + ::gko::MpiError(__FILE__, __LINE__, __func__, _errcode) + +/** + * Throws when MPI has already been initialized. + * + */ +#define GKO_MPI_INITIALIZED \ + { \ + throw ::gko::MpiInitialized(__FILE__, __LINE__, __func__); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +/** + * Throws when MPI has already been finalized. + * + */ +#define GKO_MPI_FINALIZED \ + { \ + throw ::gko::MpiFinalized(__FILE__, __LINE__, __func__); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + /** * Instantiates a CudaError. * @@ -528,6 +561,20 @@ inline dim<2> get_size(const dim<2>& size) { return size; } } while (false) +/** + * Asserts that a MPI library call completed without errors. + * + * @param _mpi_call a library call expression + */ +#define GKO_ASSERT_NO_MPI_ERRORS(_mpi_call) \ + do { \ + auto _errcode = _mpi_call; \ + if (_errcode != MPI_SUCCESS) { \ + throw GKO_MPI_ERROR(_errcode); \ + } \ + } while (false) + + namespace detail { diff --git a/mpi/base/exception.cpp b/mpi/base/exception.cpp new file mode 100644 index 00000000000..1481486b3a5 --- /dev/null +++ b/mpi/base/exception.cpp @@ -0,0 +1,55 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include + +#include + + +#include + + +namespace gko { + + +std::string MpiError::get_error(int64 error_code) +{ + int len = MPI_MAX_ERROR_STRING; + char* error_string = new char[len]; + MPI_Error_string(error_code, error_string, &len); + std::string message = "MPI Error: " + std::string(error_string); + delete[] error_string; + return message; +} + +} // namespace gko From bdb5f874b8e547809c5fe800b0add4363ce836c4 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:05:09 +0200 Subject: [PATCH 07/59] Add MPI bindings and binding helpers --- mpi/base/bindings.cpp | 794 ++++++++++++++++++++++++++++++++++++++++++ mpi/base/bindings.hpp | 516 +++++++++++++++++++++++++++ mpi/base/helpers.hpp | 161 +++++++++ 3 files changed, 1471 insertions(+) create mode 100644 mpi/base/bindings.cpp create mode 100644 mpi/base/bindings.hpp create mode 100644 mpi/base/helpers.hpp diff --git a/mpi/base/bindings.cpp b/mpi/base/bindings.cpp new file mode 100644 index 00000000000..8832188e07a --- /dev/null +++ b/mpi/base/bindings.cpp @@ -0,0 +1,794 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include +#include + + +#include "mpi/base/bindings.hpp" + + +#include +#include +#include + + +#include "mpi/base/helpers.hpp" + + +namespace gko { +namespace mpi { + + +bool init_finalize::is_initialized() +{ + int flag = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Initialized(&flag)); + return flag; +} + + +bool init_finalize::is_finalized() +{ + int flag = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Finalized(&flag)); + return flag; +} + + +init_finalize::init_finalize(int& argc, char**& argv, + const size_type num_threads) +{ + auto flag = is_initialized(); + if (!flag) { + this->required_thread_support_ = MPI_THREAD_SERIALIZED; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Init_thread(&argc, &argv, this->required_thread_support_, + &(this->provided_thread_support_))); + } else { + // GKO_MPI_INITIALIZED; + } +} + + +init_finalize::~init_finalize() +{ + auto flag = is_finalized(); + if (!flag) MPI_Finalize(); +} + + +mpi_type::mpi_type(const int count, MPI_Datatype& old) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_contiguous(count, old, &this->type_)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_commit(&this->type_)); +} + + +mpi_type::~mpi_type() { MPI_Type_free(&(this->type_)); } + + +communicator::communicator(const MPI_Comm& comm) +{ + this->comm_ = bindings::duplicate_comm(comm); + this->size_ = bindings::get_comm_size(this->comm_); + this->rank_ = bindings::get_comm_rank(this->comm_); + this->local_rank_ = bindings::get_local_rank(this->comm_); +} + + +communicator::communicator(const MPI_Comm& comm_in, int color, int key) +{ + this->comm_ = bindings::create_comm(comm_in, color, key); + this->size_ = bindings::get_comm_size(this->comm_); + this->rank_ = bindings::get_comm_rank(this->comm_); + this->local_rank_ = bindings::get_local_rank(this->comm_); +} + + +communicator::communicator() +{ + this->comm_ = MPI_COMM_NULL; + this->size_ = 0; + this->rank_ = -1; +} + + +communicator::communicator(communicator& other) +{ + this->comm_ = bindings::duplicate_comm(other.comm_); + this->size_ = bindings::get_comm_size(this->comm_); + this->rank_ = bindings::get_comm_rank(this->comm_); + this->local_rank_ = bindings::get_local_rank(this->comm_); +} + + +communicator& communicator::operator=(const communicator& other) +{ + this->comm_ = bindings::duplicate_comm(other.comm_); + this->size_ = bindings::get_comm_size(this->comm_); + this->rank_ = bindings::get_comm_rank(this->comm_); + this->local_rank_ = bindings::get_local_rank(this->comm_); + return *this; +} + + +communicator::communicator(communicator&& other) +{ + this->comm_ = bindings::duplicate_comm(other.comm_); + this->size_ = bindings::get_comm_size(this->comm_); + this->rank_ = bindings::get_comm_rank(this->comm_); + this->local_rank_ = bindings::get_local_rank(this->comm_); + other.comm_ = MPI_COMM_NULL; + other.size_ = 0; + other.rank_ = -1; +} + + +communicator& communicator::operator=(communicator&& other) +{ + this->comm_ = bindings::duplicate_comm(other.comm_); + this->size_ = bindings::get_comm_size(this->comm_); + this->rank_ = bindings::get_comm_rank(this->comm_); + this->local_rank_ = bindings::get_local_rank(this->comm_); + other.size_ = 0; + other.rank_ = -1; + return *this; +} + + +communicator::~communicator() { bindings::free_comm(this->comm_); } + + +info::info() { bindings::create_info(&this->info_); } + + +void info::add(std::string key, std::string value) +{ + this->key_value_[key] = value; + bindings::add_info_key_value_pair(&this->info_, key.c_str(), value.c_str()); +} + + +void info::remove(std::string key) +{ + bindings::remove_info_key_value_pair(&this->info_, key.c_str()); +} + + +info::~info() +{ + if (this->info_ != MPI_INFO_NULL) bindings::free_info(&this->info_); +} + + +bool communicator::compare(const MPI_Comm& comm) const +{ + return bindings::compare_comm(this->comm_, comm); +} + + +template +window::window(ValueType* base, unsigned int size, + std::shared_ptr comm, + const int disp_unit, info input_info, + win_type create_type) +{ + if (create_type == win_type::create) { + bindings::create_window(base, size, disp_unit, input_info.get(), + comm->get(), &this->window_); + } else if (create_type == win_type::dynamic_create) { + bindings::create_dynamic_window(input_info.get(), comm->get(), + &this->window_); + } else if (create_type == win_type::allocate) { + bindings::allocate_window(size, disp_unit, input_info.get(), + comm->get(), base, &this->window_); + } else { + GKO_NOT_IMPLEMENTED; + } +} + + +template +void window::fence(int assert) +{ + bindings::fence_window(assert, &this->window_); +} + + +template +void window::lock(int rank, int assert, lock_type lock_t) +{ + if (lock_t == lock_type::shared) { + bindings::lock_window(MPI_LOCK_SHARED, rank, assert, &this->window_); + } else if (lock_t == lock_type::exclusive) { + bindings::lock_window(MPI_LOCK_EXCLUSIVE, rank, assert, &this->window_); + } else { + GKO_NOT_IMPLEMENTED; + } +} + + +template +void window::unlock(int rank) +{ + bindings::unlock_window(rank, &this->window_); +} + + +template +void window::lock_all(int assert) +{ + bindings::lock_all_windows(assert, &this->window_); +} + + +template +void window::unlock_all() +{ + bindings::unlock_all_windows(&this->window_); +} + + +template +void window::flush(int rank) +{ + bindings::flush_window(rank, &this->window_); +} + + +template +void window::flush_local(int rank) +{ + bindings::flush_local_window(rank, &this->window_); +} + + +template +void window::flush_all() +{ + bindings::flush_all_windows(&this->window_); +} + + +template +void window::flush_all_local() +{ + bindings::flush_all_local_windows(&this->window_); +} + + +template +window::~window() +{ + if (this->window_ && this->window_ != MPI_WIN_NULL) { + bindings::free_window(&this->window_); + } +} + + +MPI_Op create_operation( + const std::function func, + void* arg1, void* arg2, int* len, MPI_Datatype* type) +{ + MPI_Op operation; + bindings::create_op(func.target(), + true, &operation); + return operation; +} + + +double get_walltime() { return bindings::get_walltime(); } + + +int get_my_rank(const communicator& comm) +{ + return bindings::get_comm_rank(comm.get()); +} + + +int get_local_rank(const communicator& comm) +{ + return bindings::get_local_rank(comm.get()); +} + + +int get_num_ranks(const communicator& comm) +{ + return bindings::get_num_ranks(comm.get()); +} + + +void synchronize(const communicator& comm) { bindings::barrier(comm.get()); } + + +void wait(std::shared_ptr req, std::shared_ptr status) +{ + if (status.get()) { + bindings::wait(req->get_requests(), status->get_statuses()); + } else { + bindings::wait(req->get_requests(), MPI_STATUS_IGNORE); + } +} + + +template +void send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr req, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + if (!req.get()) { + bindings::send(send_buffer, send_count, send_type, destination_rank, + send_tag, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_send(send_buffer, send_count, send_type, destination_rank, + send_tag, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, + const int recv_tag, std::shared_ptr req, + std::shared_ptr status, + std::shared_ptr comm) +{ + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::recv(recv_buffer, recv_count, recv_type, source_rank, + recv_tag, + comm ? comm->get() : communicator::get_comm_world(), + MPI_STATUS_IGNORE); + } else { + bindings::i_recv(recv_buffer, recv_count, recv_type, source_rank, + recv_tag, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window, + std::shared_ptr req) +{ + auto put_type = helpers::get_mpi_type(origin_buffer[0]); + if (!req.get()) { + bindings::put(origin_buffer, origin_count, put_type, target_rank, + target_disp, target_count, put_type, window.get()); + } else { + bindings::req_put(origin_buffer, origin_count, put_type, target_rank, + target_disp, target_count, put_type, window.get(), + req->get_requests()); + } +} + + +template +void get(GetType* origin_buffer, const int origin_count, const int target_rank, + const unsigned int target_disp, const int target_count, + window& window, std::shared_ptr req) +{ + auto get_type = helpers::get_mpi_type(origin_buffer[0]); + if (!req.get()) { + bindings::get(origin_buffer, origin_count, get_type, target_rank, + target_disp, target_count, get_type, window.get()); + } else { + bindings::req_get(origin_buffer, origin_count, get_type, target_rank, + target_disp, target_count, get_type, window.get(), + req->get_requests()); + } +} + + +template +void broadcast(BroadcastType* buffer, int count, int root_rank, + std::shared_ptr comm) +{ + auto bcast_type = helpers::get_mpi_type(buffer[0]); + bindings::broadcast(buffer, count, bcast_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} + + +template +void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, + op_type op_enum, int root_rank, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(send_buffer[0]); + if (!req.get()) { + bindings::reduce(send_buffer, recv_buffer, count, reduce_type, + operation, root_rank, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_reduce(send_buffer, recv_buffer, count, reduce_type, + operation, root_rank, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::all_reduce( + bindings::in_place(), recv_buffer, count, reduce_type, + operation, comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_reduce( + bindings::in_place(), recv_buffer, count, reduce_type, + operation, comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, op_type op_enum, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::all_reduce( + send_buffer, recv_buffer, count, reduce_type, operation, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_reduce( + send_buffer, recv_buffer, count, reduce_type, operation, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::gather(send_buffer, send_count, send_type, recv_buffer, + recv_count, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} + + +template +void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::gatherv(send_buffer, send_count, send_type, recv_buffer, + recv_counts, displacements, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} + + +template +void all_gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::all_gather(send_buffer, send_count, send_type, recv_buffer, + recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world()); +} + + +template +void scatter(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::scatter(send_buffer, send_count, send_type, recv_buffer, + recv_count, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} + + +template +void scatter(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::scatterv(send_buffer, send_counts, displacements, send_type, + recv_buffer, recv_count, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} + + +template +void all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::all_to_all( + bindings::in_place(), recv_count, recv_type, recv_buffer, + recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_to_all( + bindings::in_place(), recv_count, recv_type, recv_buffer, + recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::all_to_all( + send_buffer, send_count, send_type, recv_buffer, + recv_count == 0 ? send_count : recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_to_all( + send_buffer, send_count, send_type, recv_buffer, + recv_count == 0 ? send_count : recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void all_to_all(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + const int stride, std::shared_ptr comm, + std::shared_ptr req) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + + // auto new_type = mpi_type(stride, send_type); + + if (!req.get()) { + bindings::all_to_all_v( + send_buffer, send_counts, send_offsets, send_type, recv_buffer, + recv_counts, recv_offsets, recv_type, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_to_all_v( + send_buffer, send_counts, send_offsets, send_type, recv_buffer, + recv_counts, recv_offsets, recv_type, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } +} + + +template +void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, + op_type op_enum, std::shared_ptr comm) +{ + auto operation = helpers::get_operation(op_enum); + auto scan_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::scan(send_buffer, recv_buffer, count, scan_type, operation, + comm ? comm->get() : communicator::get_comm_world()); +} + + +#define GKO_DECLARE_WINDOW(ValueType) class window + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_WINDOW); + + +#define GKO_DECLARE_SEND(SendType) \ + void send(const SendType* send_buffer, const int send_count, \ + const int destination_rank, const int send_tag, \ + std::shared_ptr req, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SEND); + + +#define GKO_DECLARE_RECV(RecvType) \ + void recv(RecvType* recv_buffer, const int recv_count, \ + const int source_rank, const int recv_tag, \ + std::shared_ptr req, std::shared_ptr status, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_RECV); + + +#define GKO_DECLARE_PUT(PutType) \ + void put(const PutType* origin_buffer, const int origin_count, \ + const int target_rank, const unsigned int target_disp, \ + const int target_count, window& window, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_PUT); + + +#define GKO_DECLARE_GET(GetType) \ + void get(GetType* origin_buffer, const int origin_count, \ + const int target_rank, const unsigned int target_disp, \ + const int target_count, window& window, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_GET); + + +#define GKO_DECLARE_BCAST(BroadcastType) \ + void broadcast(BroadcastType* buffer, int count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_BCAST); + + +#define GKO_DECLARE_REDUCE(ReduceType) \ + void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ + int count, op_type operation, int root_rank, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_REDUCE); + + +#define GKO_DECLARE_ALLREDUCE1(ReduceType) \ + void all_reduce(ReduceType* recv_buffer, int count, op_type operation, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE1); + + +#define GKO_DECLARE_ALLREDUCE2(ReduceType) \ + void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ + int count, op_type operation, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE2); + + +#define GKO_DECLARE_GATHER1(SendType, RecvType) \ + void gather(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER1); + + +#define GKO_DECLARE_GATHER2(SendType, RecvType) \ + void gather(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int* recv_counts, \ + const int* displacements, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER2); + + +#define GKO_DECLARE_ALLGATHER(SendType, RecvType) \ + void all_gather(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALLGATHER); + + +#define GKO_DECLARE_SCATTER1(SendType, RecvType) \ + void scatter(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER1); + + +#define GKO_DECLARE_SCATTER2(SendType, RecvType) \ + void scatter(const SendType* send_buffer, const int* send_counts, \ + const int* displacements, RecvType* recv_buffer, \ + const int recv_count, int root_rank, \ + std::shared_ptr comm) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER2); + + +#define GKO_DECLARE_ALL_TO_ALL1(RecvType) \ + void all_to_all(RecvType* recv_buffer, const int recv_count, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALL_TO_ALL1); + + +#define GKO_DECLARE_ALL_TO_ALL2(SendType, RecvType) \ + void all_to_all(const SendType* send_buffer, const int send_count, \ + RecvType* recv_buffer, const int recv_count, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALL_TO_ALL2); + + +#define GKO_DECLARE_ALL_TO_ALL_V(SendType, RecvType) \ + void all_to_all(const SendType* send_buffer, const int* send_counts, \ + const int* send_offsets, RecvType* recv_buffer, \ + const int* recv_counts, const int* recv_offsets, \ + const int stride, \ + std::shared_ptr comm, \ + std::shared_ptr req) + +GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ALL_TO_ALL_V); + + +#define GKO_DECLARE_SCAN(ScanType) \ + void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, \ + op_type op_enum, std::shared_ptr comm) +GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SCAN); + + +} // namespace mpi +} // namespace gko diff --git a/mpi/base/bindings.hpp b/mpi/base/bindings.hpp new file mode 100644 index 00000000000..8f035081b3d --- /dev/null +++ b/mpi/base/bindings.hpp @@ -0,0 +1,516 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_MPI_BINDINGS_HPP_ +#define GKO_MPI_BINDINGS_HPP_ + + +#include + + +#include + + +#include + + +namespace gko { +/** + * @brief The MPI namespace. + * + * @ingroup mpi + */ +namespace mpi { +/** + * @brief The bindings namespace. + * + * @ingroup bindings + */ +namespace bindings { + + +inline double get_walltime() { return MPI_Wtime(); } + + +inline int get_comm_size(const MPI_Comm& comm) +{ + int size = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(comm, &size)); + return size; +} + + +inline int get_comm_rank(const MPI_Comm& comm) +{ + int my_rank = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(comm, &my_rank)); + return my_rank; +} + + +inline int get_local_rank(const MPI_Comm& comm) +{ + MPI_Comm local_comm; + int rank; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, + MPI_INFO_NULL, &local_comm)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(local_comm, &rank)); + MPI_Comm_free(&local_comm); + return rank; +} + + +inline int get_num_ranks(const MPI_Comm& comm) +{ + int size = 1; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(comm, &size)); + return size; +} + + +inline void barrier(const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm)); +} + + +inline MPI_Comm create_comm(const MPI_Comm& comm_in, int color, int key) +{ + MPI_Comm comm_out; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm_in, color, key, &comm_out)); + return comm_out; +} + + +inline MPI_Comm duplicate_comm(const MPI_Comm& comm) +{ + MPI_Comm dup; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(comm, &dup)); + return dup; +} + + +inline bool compare_comm(const MPI_Comm& comm1, const MPI_Comm comm2) +{ + int flag; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(comm1, comm2, &flag)); + return flag; +} + + +inline void free_comm(MPI_Comm comm) +{ + if (comm && comm != MPI_COMM_NULL) { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(&comm)); + } +} + + +inline void create_window(void* base, unsigned int size, const int disp_unit, + MPI_Info info, const MPI_Comm comm, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_create(base, size, disp_unit, info, comm, win)); +} + + +inline void create_dynamic_window(MPI_Info info, const MPI_Comm comm, + MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create_dynamic(info, comm, win)); +} + + +inline void allocate_window(unsigned int size, const int disp_unit, + MPI_Info info, const MPI_Comm comm, void* base, + MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_allocate(size, disp_unit, info, comm, base, win)); +} + + +inline void free_window(MPI_Win* win) +{ + if (win) { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_free(win)); + } +} + + +inline void fence_window(int assert, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_fence(assert, *win)); +} + + +inline void lock_window(int lock_t, int rank, int assert, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock(lock_t, rank, assert, *win)); +} + + +inline void unlock_window(int rank, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock(rank, *win)); +} + + +inline void lock_all_windows(int assert, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock_all(assert, *win)); +} + + +inline void unlock_all_windows(MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock_all(*win)); +} + + +inline void flush_window(int rank, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush(rank, *win)); +} + + +inline void flush_local_window(int rank, MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local(rank, *win)); +} + + +inline void flush_all_windows(MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_all(*win)); +} + + +inline void flush_all_local_windows(MPI_Win* win) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(*win)); +} + + +inline void create_info(MPI_Info* info) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_create(info)); +} + + +inline void add_info_key_value_pair(MPI_Info* info, const char* key, + const char* value) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_set(*info, key, value)); +} + + +inline void remove_info_key_value_pair(MPI_Info* info, const char* key) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_delete(*info, key)); +} + + +inline void free_info(MPI_Info* info) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_free(info)); +} + + +inline void create_op(MPI_User_function* func, int commute, MPI_Op* op) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Op_create(func, commute, op)); +} + + +inline void wait(MPI_Request* request, MPI_Status* status) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(request, status)); +} + +template +inline const T* in_place() +{ + return reinterpret_cast(MPI_IN_PLACE); +} + +inline void send(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, const int destination_rank, + const int send_tag, const MPI_Comm comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, send_type, + destination_rank, send_tag, comm)); +} + + +inline void recv(void* recv_buffer, const int recv_count, + MPI_Datatype& recv_type, const int source_rank, + const int recv_tag, const MPI_Comm comm, MPI_Status* status) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv(recv_buffer, recv_count, recv_type, + source_rank, recv_tag, comm, status)); +} + + +inline void i_send(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, const int destination_rank, + const int send_tag, const MPI_Comm comm, + MPI_Request* request) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, send_type, + destination_rank, send_tag, comm, + request)); +} + + +inline void i_recv(void* recv_buffer, const int recv_count, + MPI_Datatype& recv_type, const int source_rank, + const int recv_tag, const MPI_Comm comm, + MPI_Request* request) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, recv_type, + source_rank, recv_tag, comm, request)); +} + + +inline void put(const void* origin_buffer, const int origin_count, + const MPI_Datatype& origin_type, const int target_rank, + const unsigned int target_disp, const int target_count, + const MPI_Datatype& target_type, MPI_Win window) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Put(origin_buffer, origin_count, origin_type, + target_rank, target_disp, target_count, + target_type, window)); +} + + +inline void req_put(const void* origin_buffer, const int origin_count, + const MPI_Datatype& origin_type, const int target_rank, + const unsigned int target_disp, const int target_count, + const MPI_Datatype& target_type, MPI_Win window, + MPI_Request* request) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput(origin_buffer, origin_count, origin_type, + target_rank, target_disp, target_count, + target_type, window, request)); +} + + +inline void get(void* origin_buffer, const int origin_count, + const MPI_Datatype& origin_type, const int target_rank, + const unsigned int target_disp, const int target_count, + const MPI_Datatype& target_type, MPI_Win window) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Get(origin_buffer, origin_count, origin_type, + target_rank, target_disp, target_count, + target_type, window)); +} + + +inline void req_get(void* origin_buffer, const int origin_count, + const MPI_Datatype& origin_type, const int target_rank, + const unsigned int target_disp, const int target_count, + const MPI_Datatype& target_type, MPI_Win window, + MPI_Request* request) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(origin_buffer, origin_count, origin_type, + target_rank, target_disp, target_count, + target_type, window, request)); +} + + +inline void broadcast(void* buffer, int count, MPI_Datatype& bcast_type, + int root_rank, const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Bcast(buffer, count, bcast_type, root_rank, comm)); +} + + +inline void reduce(const void* send_buffer, void* recv_buffer, int count, + MPI_Datatype& reduce_type, MPI_Op operation, int root_rank, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, + reduce_type, operation, root_rank, + comm)); +} + + +inline void all_reduce(const void* send_buffer, void* recv_buffer, int count, + MPI_Datatype& reduce_type, MPI_Op operation, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(send_buffer, recv_buffer, count, + reduce_type, operation, comm)); +} + + +inline void i_reduce(const void* send_buffer, void* recv_buffer, int count, + MPI_Datatype& reduce_type, MPI_Op operation, int root_rank, + const MPI_Comm& comm, MPI_Request* requests) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, + reduce_type, operation, root_rank, + comm, requests)); +} + + +inline void i_all_reduce(const void* send_buffer, void* recv_buffer, int count, + MPI_Datatype& reduce_type, MPI_Op operation, + const MPI_Comm& comm, MPI_Request* requests) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, + reduce_type, operation, comm, + requests)); +} + + +inline void gather(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, void* recv_buffer, + const int recv_count, MPI_Datatype& recv_type, int root, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + root, comm)); +} + + +inline void gatherv(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, void* recv_buffer, + const int* recv_counts, const int* displacements, + MPI_Datatype& recv_type, int root_rank, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Gatherv(send_buffer, send_count, send_type, recv_buffer, + recv_counts, displacements, recv_type, root_rank, comm)); +} + + +inline void all_gather(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, void* recv_buffer, + const int recv_count, MPI_Datatype& recv_type, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + comm)); +} + + +inline void scatter(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, void* recv_buffer, + const int recv_count, MPI_Datatype& recv_type, int root, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + root, comm)); +} + + +inline void scatterv(const void* send_buffer, const int* send_counts, + const int* displacements, MPI_Datatype& send_type, + void* recv_buffer, const int recv_count, + MPI_Datatype& recv_type, int root_rank, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Scatterv(send_buffer, send_counts, displacements, send_type, + recv_buffer, recv_count, recv_type, root_rank, comm)); +} + + +inline void all_to_all(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, void* recv_buffer, + const int recv_count, MPI_Datatype& recv_type, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + comm)); +} + + +inline void i_all_to_all(const void* send_buffer, const int send_count, + MPI_Datatype& send_type, void* recv_buffer, + const int recv_count, MPI_Datatype& recv_type, + const MPI_Comm& comm, MPI_Request* requests) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + comm, requests)); +} + + +inline void all_to_all_v(const void* send_buffer, const int* send_count, + const int* send_offsets, const MPI_Datatype& send_type, + void* recv_buffer, const int* recv_count, + const int* recv_offsets, const MPI_Datatype& recv_type, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Alltoallv(send_buffer, send_count, send_offsets, send_type, + recv_buffer, recv_count, recv_offsets, recv_type, comm)); +} + + +inline void i_all_to_all_v(const void* send_buffer, const int* send_count, + const int* send_offsets, + const MPI_Datatype& send_type, void* recv_buffer, + const int* recv_count, const int* recv_offsets, + const MPI_Datatype& recv_type, const MPI_Comm& comm, + MPI_Request* requests) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( + send_buffer, send_count, send_offsets, send_type, recv_buffer, + recv_count, recv_offsets, recv_type, comm, requests)); +} + + +inline void scan(const void* send_buffer, void* recv_buffer, int count, + MPI_Datatype& reduce_type, MPI_Op operation, + const MPI_Comm& comm) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, + reduce_type, operation, comm)); +} + + +} // namespace bindings +} // namespace mpi +} // namespace gko + + +#endif // GKO_MPI_BINDINGS_HPP_ diff --git a/mpi/base/helpers.hpp b/mpi/base/helpers.hpp new file mode 100644 index 00000000000..d569a568760 --- /dev/null +++ b/mpi/base/helpers.hpp @@ -0,0 +1,161 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_MPI_HELPERS_HPP_ +#define GKO_MPI_HELPERS_HPP_ + + +#include + + +#include + + +#include + + +namespace gko { +/** + * @brief The MPI namespace. + * + * @ingroup mpi + */ +namespace mpi { +/** + * @brief The helpers namespace. + * + * @ingroup helper + */ +namespace helpers { + +#define GKO_MPI_DATATYPE(BaseType, MPIType) \ + inline MPI_Datatype get_mpi_type(const BaseType&) { return MPIType; } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + +GKO_MPI_DATATYPE(bool, MPI_C_BOOL); +GKO_MPI_DATATYPE(char, MPI_CHAR); +GKO_MPI_DATATYPE(unsigned char, MPI_UNSIGNED_CHAR); +GKO_MPI_DATATYPE(unsigned, MPI_UNSIGNED); +GKO_MPI_DATATYPE(int, MPI_INT); +GKO_MPI_DATATYPE(unsigned long, MPI_UNSIGNED_LONG); +GKO_MPI_DATATYPE(unsigned short, MPI_UNSIGNED_SHORT); +GKO_MPI_DATATYPE(long, MPI_LONG); +GKO_MPI_DATATYPE(float, MPI_FLOAT); +GKO_MPI_DATATYPE(double, MPI_DOUBLE); +GKO_MPI_DATATYPE(long double, MPI_LONG_DOUBLE); +GKO_MPI_DATATYPE(std::complex, MPI_COMPLEX); +GKO_MPI_DATATYPE(std::complex, MPI_DOUBLE_COMPLEX); + + +namespace detail { +namespace operations { + +template +void custom(void* in, void* inout, int* size, MPI_Datatype*) +{ + auto l_in = reinterpret_cast(in); + auto l_inout = reinterpret_cast(inout); + ValueType sum = 0.0; + for (auto i = 0; i < *size; ++i) { + } + *l_inout = (*l_in); +} + + +} // namespace operations +} // namespace detail + + +template +MPI_Op get_operation(gko::mpi::op_type op) +{ + switch (op) { + case gko::mpi::op_type::sum: + return MPI_SUM; + break; + case gko::mpi::op_type::min: + return MPI_MIN; + break; + case gko::mpi::op_type::max: + return MPI_MAX; + break; + case gko::mpi::op_type::product: + return MPI_PROD; + break; + case gko::mpi::op_type::custom: { + // TEMPLATE to create custom operations + MPI_Op op; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Op_create(detail::operations::custom, true, &op)); + return op; + break; + } + case gko::mpi::op_type::logical_and: + return MPI_LAND; + break; + case gko::mpi::op_type::bitwise_and: + return MPI_BAND; + break; + case gko::mpi::op_type::logical_or: + return MPI_LOR; + break; + case gko::mpi::op_type::bitwise_or: + return MPI_BOR; + break; + case gko::mpi::op_type::logical_xor: + return MPI_LXOR; + break; + case gko::mpi::op_type::bitwise_xor: + return MPI_BXOR; + break; + case gko::mpi::op_type::max_val_and_loc: + return MPI_MAXLOC; + break; + case gko::mpi::op_type::min_val_and_loc: + return MPI_MINLOC; + break; + default: + GKO_NOT_SUPPORTED(op); + break; + } +} + + +} // namespace helpers +} // namespace mpi +} // namespace gko + + +#endif // GKO_MPI_HELPERS_HPP_ From 4f5db6b53aba910ad361ec1ae933482a67b8b2fc Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:06:52 +0200 Subject: [PATCH 08/59] Add MPI tests and CUDA Aware check helper --- mpi/test/CMakeLists.txt | 1 + mpi/test/base/CMakeLists.txt | 3 + mpi/test/base/bindings.cpp | 753 ++++++++++++++++++++++++++++ mpi/test/base/communicator.cpp | 224 +++++++++ mpi/test/base/exception_helpers.cpp | 66 +++ mpi/test/cuda-aware-mpi-test.cu | 82 +++ mpi/test/gtest-mpi-listener.hpp | 588 ++++++++++++++++++++++ mpi/test/gtest-mpi-main.hpp | 52 ++ 8 files changed, 1769 insertions(+) create mode 100644 mpi/test/CMakeLists.txt create mode 100644 mpi/test/base/CMakeLists.txt create mode 100644 mpi/test/base/bindings.cpp create mode 100644 mpi/test/base/communicator.cpp create mode 100644 mpi/test/base/exception_helpers.cpp create mode 100644 mpi/test/cuda-aware-mpi-test.cu create mode 100644 mpi/test/gtest-mpi-listener.hpp create mode 100644 mpi/test/gtest-mpi-main.hpp diff --git a/mpi/test/CMakeLists.txt b/mpi/test/CMakeLists.txt new file mode 100644 index 00000000000..1ad6a5575b2 --- /dev/null +++ b/mpi/test/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(base) diff --git a/mpi/test/base/CMakeLists.txt b/mpi/test/base/CMakeLists.txt new file mode 100644 index 00000000000..96be37342b5 --- /dev/null +++ b/mpi/test/base/CMakeLists.txt @@ -0,0 +1,3 @@ +ginkgo_create_mpi_test(communicator 8) +ginkgo_create_mpi_test(exception_helpers 2) +ginkgo_create_mpi_test(bindings 4) diff --git a/mpi/test/base/bindings.cpp b/mpi/test/base/bindings.cpp new file mode 100644 index 00000000000..14ae9c3a598 --- /dev/null +++ b/mpi/test/base/bindings.cpp @@ -0,0 +1,753 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include +#include + + +#include + + +#include + + +#include "gtest-mpi-listener.hpp" +#include "gtest-mpi-main.hpp" + + +#include +#include +#include +#include +#include + +class MpiBindings : public ::testing::Test { +protected: + MpiBindings() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; + + void assert_equal_arrays(gko::Array& array_1, + gko::Array& array_2) + { + ASSERT_EQ(array_1.get_num_elems(), array_2.get_num_elems()); + for (gko::size_type i = 0; i < array_1.get_num_elems(); ++i) { + EXPECT_EQ(array_1.get_const_data()[i], array_2.get_const_data()[i]); + } + } +}; + + +TEST_F(MpiBindings, CanSetADefaultWindow) +{ + gko::mpi::window win; + ASSERT_EQ(win.get(), MPI_WIN_NULL); +} + + +TEST_F(MpiBindings, CanCreateWindow) +{ + using ValueType = int; + ValueType* data; + data = new ValueType[4]{1, 2, 3, 4}; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto win = gko::mpi::window(data, 4 * sizeof(ValueType), comm); + ASSERT_NE(win.get(), MPI_WIN_NULL); + win.lock_all(); + win.unlock_all(); + delete data; +} + + +TEST_F(MpiBindings, CanSendAndRecvValues) +{ + using ValueType = int; + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm); + auto num_ranks = gko::mpi::get_num_ranks(comm); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + ValueType* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + send_array = + gko::Array{ref, gko::Array(ref, 4, data)}; + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + gko::mpi::send(send_array.get_const_data(), 4, rank, + 40 + rank); + } + } + } else { + recv_array = gko::Array{ref, 4}; + gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank); + } + if (my_rank != 0) { + ASSERT_EQ(recv_array.get_data()[0], 1); + ASSERT_EQ(recv_array.get_data()[1], 2); + ASSERT_EQ(recv_array.get_data()[2], 3); + ASSERT_EQ(recv_array.get_data()[3], 4); + } +} + + +TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) +{ + using ValueType = int; + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm); + auto num_ranks = gko::mpi::get_num_ranks(comm); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + ValueType* data; + auto req = gko::mpi::request::create(num_ranks); + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + send_array = + gko::Array{ref, gko::Array(ref, 4, data)}; + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + gko::mpi::send(send_array.get_data(), 4, rank, + 40 + rank, req); + } + } + } else { + recv_array = gko::Array{ref, 4}; + gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, + req); + } + gko::mpi::wait(req); + if (my_rank != 0) { + ASSERT_EQ(recv_array.get_data()[0], 1); + ASSERT_EQ(recv_array.get_data()[1], 2); + ASSERT_EQ(recv_array.get_data()[2], 3); + ASSERT_EQ(recv_array.get_data()[3], 4); + } +} + + +TEST_F(MpiBindings, CanPutValuesWithLockAll) +{ + using ValueType = int; + using window = gko::mpi::window; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); + int* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + } else { + data = new ValueType[4]{0, 0, 0, 0}; + } + auto win = window(data, 4 * sizeof(ValueType), comm); + win.lock_all(); + if (my_rank == 0) { + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + gko::mpi::put(data, 4, rank, 0, 4, win); + win.flush(rank); + } + } + } + win.unlock_all(); + gko::mpi::synchronize(); + ASSERT_EQ(data[0], 1); + ASSERT_EQ(data[1], 2); + ASSERT_EQ(data[2], 3); + ASSERT_EQ(data[3], 4); + delete data; +} + + +TEST_F(MpiBindings, CanPutValuesWithExclusiveLock) +{ + using ValueType = int; + using window = gko::mpi::window; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); + int* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + } else { + data = new ValueType[4]{0, 0, 0, 0}; + } + auto win = window(data, 4 * sizeof(ValueType), comm); + if (my_rank == 0) { + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + win.lock(rank, 0, window::lock_type::exclusive); + gko::mpi::put(data, 4, rank, 0, 4, win); + win.flush(rank); + win.unlock(rank); + } + } + } + gko::mpi::synchronize(); + ASSERT_EQ(data[0], 1); + ASSERT_EQ(data[1], 2); + ASSERT_EQ(data[2], 3); + ASSERT_EQ(data[3], 4); + delete data; +} + + +TEST_F(MpiBindings, CanPutValuesWithFence) +{ + using ValueType = int; + using window = gko::mpi::window; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + int* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + } else { + data = new ValueType[4]{0, 0, 0, 0}; + } + auto win = window(data, 4 * sizeof(ValueType), comm); + win.fence(); + if (my_rank == 0) { + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + gko::mpi::put(data, 4, rank, 0, 4, win); + } + } + } + win.fence(); + gko::mpi::synchronize(); + ASSERT_EQ(data[0], 1); + ASSERT_EQ(data[1], 2); + ASSERT_EQ(data[2], 3); + ASSERT_EQ(data[3], 4); + delete data; +} + + +TEST_F(MpiBindings, CanGetValuesWithLockAll) +{ + using ValueType = int; + using Window = gko::mpi::window; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + int* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + } else { + data = new ValueType[4]{0, 0, 0, 0}; + } + auto win = Window(data, 4 * sizeof(ValueType), comm); + if (my_rank != 0) { + win.lock_all(); + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + gko::mpi::get(data, 4, 0, 0, 4, win); + win.flush(0); + } + } + win.unlock_all(); + } + gko::mpi::synchronize(); + ASSERT_EQ(data[0], 1); + ASSERT_EQ(data[1], 2); + ASSERT_EQ(data[2], 3); + ASSERT_EQ(data[3], 4); + delete data; +} + + +TEST_F(MpiBindings, CanGetValuesWithExclusiveLock) +{ + using ValueType = int; + using Window = gko::mpi::window; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + int* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + } else { + data = new ValueType[4]{0, 0, 0, 0}; + } + auto win = Window(data, 4 * sizeof(ValueType), comm); + if (my_rank != 0) { + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + win.lock(0, 0, Window::lock_type::exclusive); + gko::mpi::get(data, 4, 0, 0, 4, win); + win.flush(0); + win.unlock(0); + } + } + } + gko::mpi::synchronize(); + ASSERT_EQ(data[0], 1); + ASSERT_EQ(data[1], 2); + ASSERT_EQ(data[2], 3); + ASSERT_EQ(data[3], 4); + delete data; +} + + +TEST_F(MpiBindings, CanGetValuesWithFence) +{ + using ValueType = int; + using Window = gko::mpi::window; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + int* data; + if (my_rank == 0) { + data = new ValueType[4]{1, 2, 3, 4}; + } else { + data = new ValueType[4]{0, 0, 0, 0}; + } + auto win = Window(data, 4 * sizeof(ValueType), comm); + win.fence(); + if (my_rank != 0) { + for (auto rank = 0; rank < num_ranks; ++rank) { + if (rank != my_rank) { + gko::mpi::get(data, 4, 0, 0, 4, win); + } + } + } + win.fence(); + gko::mpi::synchronize(); + ASSERT_EQ(data[0], 1); + ASSERT_EQ(data[1], 2); + ASSERT_EQ(data[2], 3); + ASSERT_EQ(data[3], 4); + delete data; +} + + +TEST_F(MpiBindings, CanBroadcastValues) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + double* data; + auto array = gko::Array{ref, 8}; + if (my_rank == 0) { + // clang-format off + data = new double[8]{ 2.0, 3.0, 1.0, + 3.0,-1.0, 0.0 , 3.5, 1.5}; + // clang-format on + array = gko::Array{gko::Array::view(ref, 8, data)}; + } + gko::mpi::broadcast(array.get_data(), 8, 0); + auto comp_data = array.get_data(); + ASSERT_EQ(comp_data[0], 2.0); + ASSERT_EQ(comp_data[1], 3.0); + ASSERT_EQ(comp_data[2], 1.0); + ASSERT_EQ(comp_data[3], 3.0); + ASSERT_EQ(comp_data[4], -1.0); + ASSERT_EQ(comp_data[5], 0.0); + ASSERT_EQ(comp_data[6], 3.5); + ASSERT_EQ(comp_data[7], 1.5); + if (my_rank == 0) { + delete data; + } +} + + +TEST_F(MpiBindings, CanReduceValues) +{ + using ValueType = double; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + ValueType data, sum, max, min; + if (my_rank == 0) { + data = 3; + } else if (my_rank == 1) { + data = 5; + } else if (my_rank == 2) { + data = 2; + } else if (my_rank == 3) { + data = 6; + } + gko::mpi::reduce(&data, &sum, 1, gko::mpi::op_type::sum, 0); + gko::mpi::reduce(&data, &max, 1, gko::mpi::op_type::max, 0); + gko::mpi::reduce(&data, &min, 1, gko::mpi::op_type::min, 0); + if (my_rank == 0) { + EXPECT_EQ(sum, 16.0); + EXPECT_EQ(max, 6.0); + EXPECT_EQ(min, 2.0); + } +} + + +TEST_F(MpiBindings, CanAllReduceValues) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + int data, sum; + if (my_rank == 0) { + data = 3; + } else if (my_rank == 1) { + data = 5; + } else if (my_rank == 2) { + data = 2; + } else if (my_rank == 3) { + data = 6; + } + gko::mpi::all_reduce(&data, &sum, 1, gko::mpi::op_type::sum); + ASSERT_EQ(sum, 16); +} + + +TEST_F(MpiBindings, CanAllReduceValuesInPlace) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + int data; + if (my_rank == 0) { + data = 3; + } else if (my_rank == 1) { + data = 5; + } else if (my_rank == 2) { + data = 2; + } else if (my_rank == 3) { + data = 6; + } + gko::mpi::all_reduce(&data, 1, gko::mpi::op_type::sum); + ASSERT_EQ(data, 16); +} + + +TEST_F(MpiBindings, CanScatterValues) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + double* data; + auto scatter_from_array = gko::Array{ref->get_master()}; + if (my_rank == 0) { + // clang-format off + data = new double[8]{ 2.0, 3.0, 1.0, + 3.0,-1.0, 0.0 , 3.5, 1.5}; + // clang-format on + scatter_from_array = gko::Array{ + ref->get_master(), gko::Array::view(ref, 8, data)}; + } + auto scatter_into_array = gko::Array{ref, 2}; + gko::mpi::scatter(scatter_from_array.get_data(), 2, + scatter_into_array.get_data(), 2, 0); + auto comp_data = scatter_into_array.get_data(); + if (my_rank == 0) { + ASSERT_EQ(comp_data[0], 2.0); + ASSERT_EQ(comp_data[1], 3.0); + delete data; + } else if (my_rank == 1) { + ASSERT_EQ(comp_data[0], 1.0); + ASSERT_EQ(comp_data[1], 3.0); + } else if (my_rank == 2) { + ASSERT_EQ(comp_data[0], -1.0); + ASSERT_EQ(comp_data[1], 0.0); + } else if (my_rank == 3) { + ASSERT_EQ(comp_data[0], 3.5); + ASSERT_EQ(comp_data[1], 1.5); + } +} + + +TEST_F(MpiBindings, CanGatherValues) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + int data; + if (my_rank == 0) { + data = 3; + } else if (my_rank == 1) { + data = 5; + } else if (my_rank == 2) { + data = 2; + } else if (my_rank == 3) { + data = 6; + } + auto gather_array = + gko::Array{ref, static_cast(num_ranks)}; + gko::mpi::gather(&data, 1, gather_array.get_data(), 1, 0); + if (my_rank == 0) { + ASSERT_EQ(gather_array.get_data()[0], 3); + ASSERT_EQ(gather_array.get_data()[1], 5); + ASSERT_EQ(gather_array.get_data()[2], 2); + ASSERT_EQ(gather_array.get_data()[3], 6); + } +} + + +TEST_F(MpiBindings, CanScatterValuesWithDisplacements) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + double* data; + auto scatter_from_array = gko::Array{ref}; + auto scatter_into_array = gko::Array{ref}; + auto s_counts = gko::Array{ref->get_master(), + static_cast(num_ranks)}; + auto displacements = gko::Array{ref->get_master()}; + int nelems; + if (my_rank == 0) { + // clang-format off + data = new double[10]{ 2.0, 3.0, 1.0, + 3.0,-1.0, 0.0, + 2.5,-1.5, 0.5, 3.5}; + // clang-format on + scatter_from_array = + gko::Array{ref, gko::Array::view(ref, 10, data)}; + nelems = 2; + displacements = gko::Array{ref, {0, 2, 6, 9}}; + } else if (my_rank == 1) { + nelems = 4; + } else if (my_rank == 2) { + nelems = 3; + } else if (my_rank == 3) { + nelems = 1; + } + scatter_into_array = + gko::Array{ref, static_cast(nelems)}; + gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0); + gko::mpi::scatter( + scatter_from_array.get_data(), s_counts.get_data(), + displacements.get_data(), scatter_into_array.get_data(), nelems, 0); + auto comp_data = scatter_into_array.get_data(); + if (my_rank == 0) { + ASSERT_EQ(comp_data[0], 2.0); + ASSERT_EQ(comp_data[1], 3.0); + delete data; + } else if (my_rank == 1) { + ASSERT_EQ(comp_data[0], 1.0); + ASSERT_EQ(comp_data[1], 3.0); + ASSERT_EQ(comp_data[2], -1.0); + ASSERT_EQ(comp_data[3], 0.0); + } else if (my_rank == 2) { + ASSERT_EQ(comp_data[0], 2.5); + ASSERT_EQ(comp_data[1], -1.5); + ASSERT_EQ(comp_data[2], 0.5); + } else if (my_rank == 3) { + ASSERT_EQ(comp_data[0], 3.5); + } +} + + +TEST_F(MpiBindings, CanGatherValuesWithDisplacements) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + double* data; + auto gather_from_array = gko::Array{ref}; + auto gather_into_array = gko::Array{ref}; + auto r_counts = gko::Array{ref->get_master(), + static_cast(num_ranks)}; + auto displacements = gko::Array{ref->get_master()}; + int nelems; + if (my_rank == 0) { + data = new double[2]{2.0, 3.0}; + gather_from_array = gko::Array{ + ref->get_master(), + gko::Array::view(ref->get_master(), 2, data)}; + nelems = 2; + displacements = gko::Array{ref->get_master(), {0, 2, 6, 7}}; + gather_into_array = gko::Array{ref, 10}; + } else if (my_rank == 1) { + data = new double[4]{1.5, 2.0, 1.0, 0.5}; + nelems = 4; + gather_from_array = gko::Array{ + ref->get_master(), + gko::Array::view(ref->get_master(), 4, data)}; + } else if (my_rank == 2) { + data = new double[1]{1.0}; + nelems = 1; + gather_from_array = gko::Array{ + ref->get_master(), + gko::Array::view(ref->get_master(), 1, data)}; + } else if (my_rank == 3) { + data = new double[3]{1.9, -4.0, 5.0}; + nelems = 3; + gather_from_array = gko::Array{ + ref->get_master(), + gko::Array::view(ref->get_master(), 3, data)}; + } + + gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0); + gko::mpi::gather( + gather_from_array.get_data(), nelems, gather_into_array.get_data(), + r_counts.get_data(), displacements.get_data(), 0); + auto comp_data = gather_into_array.get_data(); + if (my_rank == 0) { + ASSERT_EQ(comp_data[0], 2.0); + ASSERT_EQ(comp_data[1], 3.0); + ASSERT_EQ(comp_data[2], 1.5); + ASSERT_EQ(comp_data[3], 2.0); + ASSERT_EQ(comp_data[4], 1.0); + ASSERT_EQ(comp_data[5], 0.5); + ASSERT_EQ(comp_data[6], 1.0); + ASSERT_EQ(comp_data[7], 1.9); + ASSERT_EQ(comp_data[8], -4.0); + ASSERT_EQ(comp_data[9], 5.0); + } else { + ASSERT_EQ(comp_data, nullptr); + } + delete data; +} + + +TEST_F(MpiBindings, AllToAllWorksCorrectly) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + auto ref_array = gko::Array{ref}; + recv_array = gko::Array{ref, 4}; + if (my_rank == 0) { + send_array = gko::Array(ref, {2.5, 3.0, 1.5, 2.0}); + ref_array = gko::Array(ref, {2.5, 2.5, 2.0, 5.5}); + } else if (my_rank == 1) { + send_array = gko::Array(ref, {2.5, 3.5, 1.0, 2.0}); + ref_array = gko::Array(ref, {3.0, 3.5, 3.0, 3.5}); + } else if (my_rank == 2) { + send_array = gko::Array(ref, {2.0, 3.0, 1.5, 0.0}); + ref_array = gko::Array(ref, {1.5, 1.0, 1.5, 3.5}); + } else if (my_rank == 3) { + send_array = gko::Array(ref, {5.5, 3.5, 3.5, -2.0}); + ref_array = gko::Array(ref, {2.0, 2.0, 0.0, -2.0}); + } + + gko::mpi::all_to_all(send_array.get_data(), 1, + recv_array.get_data()); + this->assert_equal_arrays(recv_array, ref_array); +} + + +TEST_F(MpiBindings, AllToAllInPlaceWorksCorrectly) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto recv_array = gko::Array{ref}; + auto ref_array = gko::Array{ref}; + recv_array = gko::Array{ref, 4}; + if (my_rank == 0) { + recv_array = gko::Array(ref, {2.5, 3.0, 1.5, 2.0}); + ref_array = gko::Array(ref, {2.5, 2.5, 2.0, 5.5}); + } else if (my_rank == 1) { + recv_array = gko::Array(ref, {2.5, 3.5, 1.0, 2.0}); + ref_array = gko::Array(ref, {3.0, 3.5, 3.0, 3.5}); + } else if (my_rank == 2) { + recv_array = gko::Array(ref, {2.0, 3.0, 1.5, 0.0}); + ref_array = gko::Array(ref, {1.5, 1.0, 1.5, 3.5}); + } else if (my_rank == 3) { + recv_array = gko::Array(ref, {5.5, 3.5, 3.5, -2.0}); + ref_array = gko::Array(ref, {2.0, 2.0, 0.0, -2.0}); + } + + gko::mpi::all_to_all(recv_array.get_data(), 1); + this->assert_equal_arrays(recv_array, ref_array); +} + + +TEST_F(MpiBindings, AllToAllVWorksCorrectly) +{ + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto send_array = gko::Array{ref}; + auto recv_array = gko::Array{ref}; + auto ref_array = gko::Array{ref}; + auto scounts_array = gko::Array{ref}; + auto soffset_array = gko::Array{ref}; + auto rcounts_array = gko::Array{ref}; + auto roffset_array = gko::Array{ref}; + if (my_rank == 0) { + recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; + send_array = gko::Array{ref, {2.5, 3.0, 1.5, 2.0}}; + scounts_array = gko::Array{ref, {1, 2, 1, 0}}; + rcounts_array = gko::Array{ref, {1, 2, 2, 1}}; + soffset_array = gko::Array{ref, {0, 1, 1, 0}}; + roffset_array = gko::Array{ref, {0, 1, 3, 5}}; + ref_array = gko::Array{ref, {2.5, 2.5, 3.5, 1.5, 2.4, 5.5}}; + } else if (my_rank == 1) { + recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; + send_array = gko::Array{ref, {2.5, 3.5, 1.0, 2.0}}; + scounts_array = gko::Array{ref, {2, 2, 1, 2}}; + rcounts_array = gko::Array{ref, {2, 2, 2, 0}}; + soffset_array = gko::Array{ref, {0, 1, 1, 0}}; + roffset_array = gko::Array{ref, {0, 2, 4, 5}}; + ref_array = gko::Array{ref, {3.0, 1.5, 3.5, 1.0, 3.0, 1.5}}; + } else if (my_rank == 2) { + recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0}}; + send_array = gko::Array{ref, {2.0, 3.0, 1.5, 2.4}}; + scounts_array = gko::Array{ref, {2, 2, 1, 1}}; + rcounts_array = gko::Array{ref, {1, 1, 1, 1}}; + soffset_array = gko::Array{ref, {2, 1, 1, 1}}; + roffset_array = gko::Array{ref, {0, 1, 2, 3}}; + ref_array = gko::Array{ref, {3.0, 3.5, 3.0, 3.5}}; + } else if (my_rank == 3) { + recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0}}; + send_array = gko::Array{ref, {5.5, 3.5, 3.5, -2.0}}; + scounts_array = gko::Array{ref, {1, 0, 1, 0}}; + rcounts_array = gko::Array{ref, {0, 2, 1, 0}}; + soffset_array = gko::Array{ref, {0, 1, 1, 0}}; + roffset_array = gko::Array{ref, {0, 1, 3, 3}}; + ref_array = gko::Array{ref, {0.0, 2.5, 3.5, 3.0}}; + } + + gko::mpi::all_to_all( + send_array.get_data(), scounts_array.get_data(), + soffset_array.get_data(), recv_array.get_data(), + rcounts_array.get_data(), roffset_array.get_data()); + this->assert_equal_arrays(recv_array, ref_array); +} + + +// Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for +// more details. +GKO_DECLARE_GTEST_MPI_MAIN; diff --git a/mpi/test/base/communicator.cpp b/mpi/test/base/communicator.cpp new file mode 100644 index 00000000000..92b343e5adf --- /dev/null +++ b/mpi/test/base/communicator.cpp @@ -0,0 +1,224 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include + +#include + +#include "gtest-mpi-listener.hpp" +#include "gtest-mpi-main.hpp" + + +#include +#include +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +class Communicator : public ::testing::Test { +protected: + Communicator() : comm(MPI_COMM_WORLD) {} + + void SetUp() + { + rank = gko::mpi::get_my_rank(comm); + ASSERT_EQ(gko::mpi::get_num_ranks(comm), 8); + } + + gko::mpi::communicator comm; + int rank; +}; + + +TEST_F(Communicator, DefaultCommIsInvalid) +{ + auto comm = gko::mpi::communicator(); + + EXPECT_EQ(comm.get(), MPI_COMM_NULL); +} + + +TEST_F(Communicator, CanCreateWorld) +{ + auto comm = gko::mpi::communicator::create_world(); + + EXPECT_EQ(comm->compare(MPI_COMM_WORLD), true); +} + + +TEST_F(Communicator, KnowsItsCommunicator) +{ + MPI_Comm dup; + MPI_Comm_dup(MPI_COMM_WORLD, &dup); + auto comm_world = gko::mpi::communicator(dup); + + EXPECT_EQ(comm_world.compare(dup), true); +} + + +TEST_F(Communicator, CommunicatorCanBeCopied) +{ + auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); + auto copy = comm_world; + + EXPECT_EQ(comm_world.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(copy.compare(MPI_COMM_WORLD), true); +} + + +TEST_F(Communicator, CommunicatorCanBeCopyConstructed) +{ + auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); + auto copy = gko::mpi::communicator(comm_world); + + EXPECT_EQ(comm_world.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(copy.compare(MPI_COMM_WORLD), true); +} + + +TEST_F(Communicator, CommunicatorCanBeMoved) +{ + int size; + auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); + + auto moved = std::move(comm_world); + + MPI_Comm_size(MPI_COMM_WORLD, &size); + EXPECT_EQ(comm_world.get(), MPI_COMM_NULL); + EXPECT_EQ(comm_world.size(), 0); + EXPECT_EQ(moved.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(moved.size(), size); +} + + +TEST_F(Communicator, CommunicatorCanBeMoveConstructed) +{ + int size; + auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); + + auto moved = gko::mpi::communicator(std::move(comm_world)); + + MPI_Comm_size(MPI_COMM_WORLD, &size); + EXPECT_EQ(comm_world.get(), MPI_COMM_NULL); + EXPECT_EQ(comm_world.size(), 0); + EXPECT_EQ(moved.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(moved.size(), size); +} + + +TEST_F(Communicator, CommKnowsItsSize) +{ + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + + EXPECT_EQ(comm.size(), size); +} + + +TEST_F(Communicator, KnowsItsSize) +{ + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + + EXPECT_EQ(gko::mpi::get_num_ranks(MPI_COMM_WORLD), size); +} + + +TEST_F(Communicator, CommKnowsItsRank) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + + EXPECT_EQ(comm.rank(), rank); +} + + +TEST_F(Communicator, CommKnowsItsLocalRank) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + + // Expect local rank to be same as rank when on one node + EXPECT_EQ(comm.local_rank(), rank); +} + + +TEST_F(Communicator, KnowsItsRanks) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + EXPECT_EQ(rank, gko::mpi::get_my_rank(MPI_COMM_WORLD)); +} + + +TEST_F(Communicator, KnowsItsDefaultCommunicator) +{ + auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); + ASSERT_TRUE(comm_world == comm); +} + + +TEST_F(Communicator, KnowsNumRanks) +{ + EXPECT_EQ(gko::mpi::get_num_ranks(comm), 8); +} + + +TEST_F(Communicator, CanSetCustomCommunicator) +{ + auto world_rank = gko::mpi::get_my_rank(comm); + auto world_size = gko::mpi::get_num_ranks(comm); + auto color = world_rank / 4; + + auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); + for (auto i = 0; i < world_size; ++i) { + EXPECT_LT(gko::mpi::get_my_rank(row_comm.get()), 4); + } +} + + +} // namespace + +// Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for +// more details. +GKO_DECLARE_GTEST_MPI_MAIN; diff --git a/mpi/test/base/exception_helpers.cpp b/mpi/test/base/exception_helpers.cpp new file mode 100644 index 00000000000..f0878b2081f --- /dev/null +++ b/mpi/test/base/exception_helpers.cpp @@ -0,0 +1,66 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + +#include "gtest-mpi-listener.hpp" +#include "gtest-mpi-main.hpp" + + +#include +#include + + +namespace { + + +TEST(AssertNoMpiErrors, ThrowsOnError) +{ + ASSERT_THROW(GKO_ASSERT_NO_MPI_ERRORS(1), gko::MpiError); +} + + +TEST(AssertNoMpiErrors, DoesNotThrowOnSuccess) +{ + ASSERT_NO_THROW(GKO_ASSERT_NO_MPI_ERRORS(MPI_SUCCESS)); +} + + +} // namespace + + +// Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for +// more details. +GKO_DECLARE_GTEST_MPI_MAIN; diff --git a/mpi/test/cuda-aware-mpi-test.cu b/mpi/test/cuda-aware-mpi-test.cu new file mode 100644 index 00000000000..51ea1960d93 --- /dev/null +++ b/mpi/test/cuda-aware-mpi-test.cu @@ -0,0 +1,82 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include +#include + +#include +#include +#include + + +int main(int argc, char* argv[]) +{ + int num_cuda_devices = 0; + cudaGetDeviceCount(&num_cuda_devices); + if (num_cuda_devices < 1) std::exit(-1); + MPI_Init(&argc, &argv); + int rank = 0; + int size = 0; + MPI_Comm_size(MPI_COMM_WORLD, &size); + assert(size > 1); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + cudaSetDevice(rank); + int* d_buf; + int* buf; + unsigned long len = 10; + buf = (int*)malloc(sizeof(int) * len); + for (int i = 0; i < len; ++i) { + buf[i] = (i + 1) * (rank + 1); + } + cudaMalloc(&d_buf, sizeof(int) * len); + cudaMemcpy(d_buf, buf, sizeof(int) * len, cudaMemcpyHostToDevice); + if (rank == 0) { + MPI_Send(d_buf, len, MPI_INT, 1, 12, MPI_COMM_WORLD); + } else { + MPI_Status status; + MPI_Recv(d_buf, len, MPI_INT, 0, 12, MPI_COMM_WORLD, &status); + for (int i = 0; i < len; ++i) { + bool flag = (buf[i] == (i + 1) * 2); + if (!flag) std::exit(-1); + } + cudaMemcpy(buf, d_buf, sizeof(int) * len, cudaMemcpyDeviceToHost); + for (int i = 0; i < len; ++i) { + bool flag = (buf[i] == (i + 1)); + if (!flag) std::exit(-1); + } + } + cudaFree(d_buf); + free(buf); + MPI_Finalize(); + return 0; +} diff --git a/mpi/test/gtest-mpi-listener.hpp b/mpi/test/gtest-mpi-listener.hpp new file mode 100644 index 00000000000..d1135c37bbc --- /dev/null +++ b/mpi/test/gtest-mpi-listener.hpp @@ -0,0 +1,588 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +/****************************************************************************** + * + * Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC + * and other gtest-mpi-listener developers. See the COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + *******************************************************************************/ +// +/******************************************************************************* + * An example from Google Test was copied with minor modifications. The + * license of Google Test is below. + * + * Google Test has the following copyright notice, which must be + * duplicated in its entirety per the terms of its license: + * + * Copyright 2005, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *******************************************************************************/ + +#ifndef GTEST_MPI_MINIMAL_LISTENER_H +#define GTEST_MPI_MINIMAL_LISTENER_H + +#include +#include +#include +#include +#include "gtest/gtest.h" +#include "mpi.h" + + +namespace GTestMPIListener { + +// This class sets up the global test environment, which is needed +// to finalize MPI. +class MPIEnvironment : public ::testing::Environment { +public: + MPIEnvironment() : ::testing::Environment() {} + + virtual ~MPIEnvironment() {} + + virtual void SetUp() + { + int is_mpi_initialized; + ASSERT_EQ(MPI_Initialized(&is_mpi_initialized), MPI_SUCCESS); + if (!is_mpi_initialized) { + printf("MPI must be initialized before RUN_ALL_TESTS!\n"); + printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); + printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); + FAIL(); + } + } + + virtual void TearDown() + { + int is_mpi_finalized; + ASSERT_EQ(MPI_Finalized(&is_mpi_finalized), MPI_SUCCESS); + if (!is_mpi_finalized) { + int rank; + ASSERT_EQ(MPI_Comm_rank(MPI_COMM_WORLD, &rank), MPI_SUCCESS); + if (rank == 0) { + printf("Finalizing MPI...\n"); + } + ASSERT_EQ(MPI_Finalize(), MPI_SUCCESS); + } + ASSERT_EQ(MPI_Finalized(&is_mpi_finalized), MPI_SUCCESS); + ASSERT_TRUE(is_mpi_finalized); + } + +private: + // Disallow copying + MPIEnvironment(const MPIEnvironment& env) {} + +}; // class MPIEnvironment + +// This class more or less takes the code in Google Test's +// MinimalistPrinter example and wraps certain parts of it in MPI calls, +// gathering all results onto rank zero. +class MPIMinimalistPrinter : public ::testing::EmptyTestEventListener { +public: + MPIMinimalistPrinter() + : ::testing::EmptyTestEventListener(), result_vector() + { + int is_mpi_initialized; + MPI_Initialized(&is_mpi_initialized); + if (!is_mpi_initialized) { + printf("MPI must be initialized before RUN_ALL_TESTS!\n"); + printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); + printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); + assert(0); + } + MPI_Comm_dup(MPI_COMM_WORLD, &comm); + UpdateCommState(); + } + + MPIMinimalistPrinter(MPI_Comm comm_) + : ::testing::EmptyTestEventListener(), result_vector() + { + int is_mpi_initialized; + MPI_Initialized(&is_mpi_initialized); + if (!is_mpi_initialized) { + printf("MPI must be initialized before RUN_ALL_TESTS!\n"); + printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); + printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); + assert(0); + } + + MPI_Comm_dup(comm_, &comm); + UpdateCommState(); + } + + MPIMinimalistPrinter(const MPIMinimalistPrinter& printer) + { + int is_mpi_initialized; + MPI_Initialized(&is_mpi_initialized); + if (!is_mpi_initialized) { + printf("MPI must be initialized before RUN_ALL_TESTS!\n"); + printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); + printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); + assert(0); + } + + MPI_Comm_dup(printer.comm, &comm); + UpdateCommState(); + result_vector = printer.result_vector; + } + + // Called before the Environment is torn down. + void OnEnvironmentTearDownStart() + { + int is_mpi_finalized; + ASSERT_EQ(MPI_Finalized(&is_mpi_finalized), MPI_SUCCESS); + if (!is_mpi_finalized) { + MPI_Comm_free(&comm); + } + } + + // Called before a test starts. + virtual void OnTestStart(const ::testing::TestInfo& test_info) + { + // Only need to report test start info on rank 0 + if (rank == 0) { + printf("*** Test %s.%s starting.\n", test_info.test_case_name(), + test_info.name()); + } + } + + // Called after an assertion failure or an explicit SUCCESS() macro. + // In an MPI program, this means that certain ranks may not call this + // function if a test part does not fail on all ranks. Consequently, it + // is difficult to have explicit synchronization points here. + virtual void OnTestPartResult( + const ::testing::TestPartResult& test_part_result) + { + result_vector.push_back(test_part_result); + } + + // Called after a test ends. + virtual void OnTestEnd(const ::testing::TestInfo& test_info) + { + int localResultCount = result_vector.size(); + std::vector resultCountOnRank(size, 0); + MPI_Gather(&localResultCount, 1, MPI_INT, &resultCountOnRank[0], 1, + MPI_INT, 0, comm); + + if (rank != 0) { + // Nonzero ranks send constituent parts of each result to rank 0 + for (int i = 0; i < localResultCount; i++) { + const ::testing::TestPartResult test_part_result = + result_vector.at(i); + int resultStatus(test_part_result.failed()); + std::string resultFileName(test_part_result.file_name()); + int resultLineNumber(test_part_result.line_number()); + std::string resultSummary(test_part_result.summary()); + + // Must add one for null termination + int resultFileNameSize(resultFileName.size() + 1); + int resultSummarySize(resultSummary.size() + 1); + + MPI_Send(&resultStatus, 1, MPI_INT, 0, rank, comm); + MPI_Send(&resultFileNameSize, 1, MPI_INT, 0, rank, comm); + MPI_Send(&resultLineNumber, 1, MPI_INT, 0, rank, comm); + MPI_Send(&resultSummarySize, 1, MPI_INT, 0, rank, comm); + MPI_Send(resultFileName.c_str(), resultFileNameSize, MPI_CHAR, + 0, rank, comm); + MPI_Send(resultSummary.c_str(), resultSummarySize, MPI_CHAR, 0, + rank, comm); + } + } else { + // Rank 0 first prints its local result data + for (int i = 0; i < localResultCount; i++) { + const ::testing::TestPartResult test_part_result = + result_vector.at(i); + printf(" %s on rank %d, %s:%d\n%s\n", + test_part_result.failed() ? "*** Failure" : "Success", + rank, test_part_result.file_name(), + test_part_result.line_number(), + test_part_result.summary()); + } + + for (int r = 1; r < size; r++) { + for (int i = 0; i < resultCountOnRank[r]; i++) { + int resultStatus, resultFileNameSize, resultLineNumber; + int resultSummarySize; + MPI_Recv(&resultStatus, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + MPI_Recv(&resultFileNameSize, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + MPI_Recv(&resultLineNumber, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + MPI_Recv(&resultSummarySize, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + + std::string resultFileName; + std::string resultSummary; + resultFileName.resize(resultFileNameSize); + resultSummary.resize(resultSummarySize); + MPI_Recv(&resultFileName[0], resultFileNameSize, MPI_CHAR, + r, r, comm, MPI_STATUS_IGNORE); + MPI_Recv(&resultSummary[0], resultSummarySize, MPI_CHAR, r, + r, comm, MPI_STATUS_IGNORE); + + printf(" %s on rank %d, %s:%d\n%s\n", + resultStatus ? "*** Failure" : "Success", r, + resultFileName.c_str(), resultLineNumber, + resultSummary.c_str()); + } + } + + printf("*** Test %s.%s ending.\n", test_info.test_case_name(), + test_info.name()); + } + + result_vector.clear(); + } + +private: + MPI_Comm comm; + int rank; + int size; + std::vector<::testing::TestPartResult> result_vector; + + int UpdateCommState() + { + int flag = MPI_Comm_rank(comm, &rank); + if (flag != MPI_SUCCESS) { + return flag; + } + flag = MPI_Comm_size(comm, &size); + return flag; + } + +}; // class MPIMinimalistPrinter + +// This class more or less takes the code in Google Test's +// MinimalistPrinter example and wraps certain parts of it in MPI calls, +// gathering all results onto rank zero. +class MPIWrapperPrinter : public ::testing::TestEventListener { +public: + MPIWrapperPrinter(::testing::TestEventListener* l, MPI_Comm comm_) + : ::testing::TestEventListener(), listener(l), result_vector() + { + int is_mpi_initialized; + MPI_Initialized(&is_mpi_initialized); + if (!is_mpi_initialized) { + printf("MPI must be initialized before RUN_ALL_TESTS!\n"); + printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); + printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); + assert(0); + } + + MPI_Comm_dup(comm_, &comm); + UpdateCommState(); + } + + MPIWrapperPrinter(const MPIWrapperPrinter& printer) + : listener(printer.listener), result_vector(printer.result_vector) + { + int is_mpi_initialized; + MPI_Initialized(&is_mpi_initialized); + if (!is_mpi_initialized) { + printf("MPI must be initialized before RUN_ALL_TESTS!\n"); + printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); + printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); + assert(0); + } + + MPI_Comm_dup(printer.comm, &comm); + UpdateCommState(); + } + + // Called before test activity starts + virtual void OnTestProgramStart(const ::testing::UnitTest& unit_test) + { + if (rank == 0) { + listener->OnTestProgramStart(unit_test); + } + } + + + // Called before each test iteration starts, where iteration is + // the iterate index. There could be more than one iteration if + // GTEST_FLAG(repeat) is used. + virtual void OnTestIterationStart(const ::testing::UnitTest& unit_test, + int iteration) + { + if (rank == 0) { + listener->OnTestIterationStart(unit_test, iteration); + } + } + + + // Called before environment setup before start of each test iteration + virtual void OnEnvironmentsSetUpStart(const ::testing::UnitTest& unit_test) + { + if (rank == 0) { + listener->OnEnvironmentsSetUpStart(unit_test); + } + } + + virtual void OnEnvironmentsSetUpEnd(const ::testing::UnitTest& unit_test) + { + if (rank == 0) { + listener->OnEnvironmentsSetUpEnd(unit_test); + } + } + +#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_ + virtual void OnTestCaseStart(const ::testing::TestCase& test_case) + { + if (rank == 0) { + listener->OnTestCaseStart(test_case); + } + } +#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_ + + // Called before a test starts. + virtual void OnTestStart(const ::testing::TestInfo& test_info) + { + // Only need to report test start info on rank 0 + if (rank == 0) { + listener->OnTestStart(test_info); + } + } + + // Called after an assertion failure or an explicit SUCCESS() macro. + // In an MPI program, this means that certain ranks may not call this + // function if a test part does not fail on all ranks. Consequently, it + // is difficult to have explicit synchronization points here. + virtual void OnTestPartResult( + const ::testing::TestPartResult& test_part_result) + { + result_vector.push_back(test_part_result); + if (rank == 0) { + listener->OnTestPartResult(test_part_result); + } + } + + // Called after a test ends. + virtual void OnTestEnd(const ::testing::TestInfo& test_info) + { + int localResultCount = result_vector.size(); + std::vector resultCountOnRank(size, 0); + MPI_Gather(&localResultCount, 1, MPI_INT, &resultCountOnRank[0], 1, + MPI_INT, 0, comm); + + if (rank != 0) { + // Nonzero ranks send constituent parts of each result to rank 0 + for (int i = 0; i < localResultCount; i++) { + const ::testing::TestPartResult test_part_result = + result_vector.at(i); + int resultStatus(test_part_result.failed()); + std::string resultFileName(test_part_result.file_name()); + int resultLineNumber(test_part_result.line_number()); + std::string resultMessage(test_part_result.message()); + + int resultFileNameSize(resultFileName.size()); + int resultMessageSize(resultMessage.size()); + + MPI_Send(&resultStatus, 1, MPI_INT, 0, rank, comm); + MPI_Send(&resultFileNameSize, 1, MPI_INT, 0, rank, comm); + MPI_Send(&resultLineNumber, 1, MPI_INT, 0, rank, comm); + MPI_Send(&resultMessageSize, 1, MPI_INT, 0, rank, comm); + MPI_Send(resultFileName.c_str(), resultFileNameSize, MPI_CHAR, + 0, rank, comm); + MPI_Send(resultMessage.c_str(), resultMessageSize, MPI_CHAR, 0, + rank, comm); + } + } else { + // Rank 0 first prints its local result data + for (int i = 0; i < localResultCount; i++) { + const ::testing::TestPartResult test_part_result = + result_vector.at(i); + if (test_part_result.failed()) { + std::string message(test_part_result.message()); + std::istringstream input_stream(message); + std::stringstream to_stream_into_failure; + std::string line_as_string; + while (std::getline(input_stream, line_as_string)) { + to_stream_into_failure << "[Rank 0/" << size << "] " + << line_as_string << std::endl; + } + + ADD_FAILURE_AT(test_part_result.file_name(), + test_part_result.line_number()) + << to_stream_into_failure.str(); + } + } + + for (int r = 1; r < size; r++) { + for (int i = 0; i < resultCountOnRank[r]; i++) { + int resultStatus, resultFileNameSize, resultLineNumber; + int resultMessageSize; + MPI_Recv(&resultStatus, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + MPI_Recv(&resultFileNameSize, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + MPI_Recv(&resultLineNumber, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + MPI_Recv(&resultMessageSize, 1, MPI_INT, r, r, comm, + MPI_STATUS_IGNORE); + + std::vector fileNameBuffer(resultFileNameSize); + std::vector messageBuffer(resultMessageSize); + MPI_Recv(&fileNameBuffer[0], resultFileNameSize, MPI_CHAR, + r, r, comm, MPI_STATUS_IGNORE); + MPI_Recv(&messageBuffer[0], resultMessageSize, MPI_CHAR, r, + r, comm, MPI_STATUS_IGNORE); + + std::string resultFileName(fileNameBuffer.begin(), + fileNameBuffer.end()); + std::string resultMessage(messageBuffer.begin(), + messageBuffer.end()); + + bool testPartHasFailed = (resultStatus == 1); + if (testPartHasFailed) { + std::string message(resultMessage); + std::istringstream input_stream(message); + std::stringstream to_stream_into_failure; + std::string line_as_string; + + while (std::getline(input_stream, line_as_string)) { + to_stream_into_failure + << "[Rank " << r << "/" << size << "] " + << line_as_string << std::endl; + } + + ADD_FAILURE_AT(resultFileName.c_str(), resultLineNumber) + << to_stream_into_failure.str(); + } + } + } + } + + result_vector.clear(); + if (rank == 0) { + listener->OnTestEnd(test_info); + } + } + +#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_ + virtual void OnTestCaseEnd(const ::testing::TestCase& test_case) + { + if (rank == 0) { + listener->OnTestCaseEnd(test_case); + } + } + +#endif + + // Called before the Environment is torn down. + virtual void OnEnvironmentsTearDownStart( + const ::testing::UnitTest& unit_test) + { + int is_mpi_finalized; + ASSERT_EQ(MPI_Finalized(&is_mpi_finalized), MPI_SUCCESS); + if (!is_mpi_finalized) { + MPI_Comm_free(&comm); + } + if (rank == 0) { + listener->OnEnvironmentsTearDownStart(unit_test); + } + } + + virtual void OnEnvironmentsTearDownEnd(const ::testing::UnitTest& unit_test) + { + if (rank == 0) { + listener->OnEnvironmentsTearDownEnd(unit_test); + } + } + + virtual void OnTestIterationEnd(const ::testing::UnitTest& unit_test, + int iteration) + { + if (rank == 0) { + listener->OnTestIterationEnd(unit_test, iteration); + } + } + + // Called when test driver program ends + virtual void OnTestProgramEnd(const ::testing::UnitTest& unit_test) + { + if (rank == 0) { + listener->OnTestProgramEnd(unit_test); + } + } + +private: + // Use a pointer here instead of a reference because + // ::testing::TestEventListeners::Release returns a pointer + // (namely, one of type ::testing::TesteEventListener*). + ::testing::TestEventListener* listener; + MPI_Comm comm; + int rank; + int size; + std::vector<::testing::TestPartResult> result_vector; + + int UpdateCommState() + { + int flag = MPI_Comm_rank(comm, &rank); + if (flag != MPI_SUCCESS) { + return flag; + } + flag = MPI_Comm_size(comm, &size); + return flag; + } +}; + +} // namespace GTestMPIListener + +#endif /* GTEST_MPI_MINIMAL_LISTENER_H */ diff --git a/mpi/test/gtest-mpi-main.hpp b/mpi/test/gtest-mpi-main.hpp new file mode 100644 index 00000000000..56edbdf283b --- /dev/null +++ b/mpi/test/gtest-mpi-main.hpp @@ -0,0 +1,52 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#define GKO_DECLARE_GTEST_MPI_MAIN \ + int main(int argc, char** argv) \ + { \ + ::testing::InitGoogleTest(&argc, argv); \ + MPI_Init(&argc, &argv); \ + ::testing::AddGlobalTestEnvironment( \ + new GTestMPIListener::MPIEnvironment); \ + ::testing::TestEventListeners& listeners = \ + ::testing::UnitTest::GetInstance()->listeners(); \ + ::testing::TestEventListener* l = \ + listeners.Release(listeners.default_result_printer()); \ + listeners.Append( \ + new GTestMPIListener::MPIWrapperPrinter(l, MPI_COMM_WORLD)); \ + int result = RUN_ALL_TESTS(); \ + return 0; \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") From 7ba1c81960db943740eecfff6a3c756f863d5b1a Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 14:07:25 +0200 Subject: [PATCH 09/59] Add a few MPI jobs for CI --- .gitlab-ci.yml | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 29edb8cc444..1cc8683bbd8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -30,6 +30,7 @@ include: BUILD_CUDA: "OFF" BUILD_HIP: "OFF" BUILD_HWLOC: "ON" + BUILD_MPI: "OFF" FAST_TESTS: "OFF" DPCPP_SINGLE_MODE: "OFF" MIXED_PRECISION: "ON" @@ -74,6 +75,8 @@ include: -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} + -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} + -DMPI_RUN_AS_ROOT=${MPI_RUN_AS_ROOT} -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON -DGINKGO_FAST_TESTS=${FAST_TESTS} @@ -109,6 +112,8 @@ include: -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} + -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} + -DMPI_RUN_AS_ROOT=${MPI_RUN_AS_ROOT} -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON -DGINKGO_FAST_TESTS=${FAST_TESTS} @@ -553,6 +558,46 @@ build/amd/clang/hip_wo_omp/release/shared: BUILD_HIP: "ON" BUILD_TYPE: "Release" +# mpi job test debug shared +build/nocuda/gcc/mpi/debug/shared: + <<: *default_build_with_test + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8-intel + variables: + <<: *default_variables + BUILD_MPI: "ON" + BUILD_TYPE: "Debug" + BUILD_SHARED_LIBS: "ON" + +# mpi job test release static +build/nocuda/gcc/mpi/release/static: + <<: *default_build_with_test + extends: + - .full_test_condition + - .use_gko-nocuda-gnu9-llvm8-intel + variables: + <<: *default_variables + BUILD_MPI: "ON" + BUILD_TYPE: "Release" + BUILD_SHARED_LIBS: "OFF" + +# mpi job with cuda 10.0 +build/cuda100/mpi/gcc/all/debug/shared: + <<: *default_build + extends: + - .quick_test_condition + - .use_gko-cuda100-gnu7-llvm60-intel2018 + variables: + <<: *default_variables + BUILD_OMP: "ON" + BUILD_CUDA: "ON" + BUILD_MPI: "ON" + BUILD_HIP: "ON" + BUILD_TYPE: "Debug" + FAST_TESTS: "ON" + CUDA_ARCH: 61 + # no cuda but latest gcc and clang build/nocuda/gcc/core/debug/static: <<: *default_build_with_test From 4e869f6e25bc9e88a82e110329978d29d0e2b591 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 22 Oct 2021 17:18:30 +0200 Subject: [PATCH 10/59] Add docs, and some tests. --- core/device_hooks/mpi_hooks.cpp | 4 +- include/ginkgo/core/base/mpi.hpp | 226 ++++++++++++++++++++++++++++++- include/ginkgo/ginkgo.hpp | 1 + mpi/base/bindings.cpp | 17 ++- mpi/base/bindings.hpp | 9 ++ mpi/test/base/bindings.cpp | 39 ++++++ 6 files changed, 288 insertions(+), 8 deletions(-) diff --git a/core/device_hooks/mpi_hooks.cpp b/core/device_hooks/mpi_hooks.cpp index cc9ad9566c8..da79eaedcf0 100644 --- a/core/device_hooks/mpi_hooks.cpp +++ b/core/device_hooks/mpi_hooks.cpp @@ -254,8 +254,8 @@ void scatter(const SendType* send_buffer, const int* send_counts, template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum, std::shared_ptr comm) - GKO_NOT_COMPILED(mpi); + op_type op_enum, std::shared_ptr comm, + std::shared_ptr req) GKO_NOT_COMPILED(mpi); template diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 4df9ed9dd13..af23e092bf7 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -104,6 +104,10 @@ using array_manager = std::unique_ptr>; namespace gko { namespace mpi { +/* + * This enum is used for selecting the operation type for functions that take + * MPI_Op. For example the MPI_Reduce operations. + */ enum class op_type { sum = 1, min = 2, @@ -277,6 +281,9 @@ class communicator : public EnableSharedCreateMethod { }; +/** + * A type helper which can be used to create MPI_Datatype from other types. + */ class mpi_type { public: mpi_type(const int count, MPI_Datatype& old); @@ -288,6 +295,14 @@ class mpi_type { }; +/** + * This class wraps the MPI_Window class with RAII functionality. Different + * create and lock type methods are setup with enums. + * + * MPI_Window is primarily used for one sided communication and this class + * provides functionalities to fence, lock, unlock and flush the communication + * buffers. + */ template class window { public: @@ -333,24 +348,61 @@ class window { }; +/** + * This function is used to synchronize between the ranks of a given + * communicator. + * + * @param comm the communicator + */ void synchronize(const communicator& comm = communicator::get_comm_world()); +/** + * Allows a rank to wait on a particular request handle. + * + * @param req The request to wait on. + * @param status The status variable that can be queried. + */ void wait(std::shared_ptr req, std::shared_ptr status = {}); double get_walltime(); +/** + * Get the rank in the communicator of the calling process. + * + * @param comm the communicator + */ int get_my_rank(const communicator& comm = communicator::get_comm_world()); +/** + * Get the node local rank in the communicator of the calling process. + * + * @param comm the communicator + */ int get_local_rank(const communicator& comm = communicator::get_comm_world()); +/** + * Get the number of ranks in the communicator of the calling process. + * + * @param comm the communicator + */ int get_num_ranks(const communicator& comm = communicator::get_comm_world()); +/** + * Send data from calling process to destination rank. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param destination_rank the rank to send the data to + * @param send_tag the tag for the send call + * @param req the request handle for the send call + * @param comm the communicator + */ template void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, @@ -358,6 +410,16 @@ void send(const SendType* send_buffer, const int send_count, std::shared_ptr comm = {}); +/** + * Receive data from source rank. + * + * @param recv_buffer the buffer to send + * @param recv_count the number of elements to send + * @param source_rank the rank to send the data to + * @param recv_tag the tag for the send call + * @param req the request handle for the send call + * @param comm the communicator + */ template void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, const int recv_tag, std::shared_ptr req = {}, @@ -365,6 +427,17 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, std::shared_ptr comm = {}); +/** + * Put data into the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to put + * @param target_rank the rank to put the data to + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + * @param window the window to put the data into + * @param req the request handle + */ template void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, @@ -372,17 +445,46 @@ void put(const PutType* origin_buffer, const int origin_count, std::shared_ptr req = {}); +/** + * Get data from the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to get + * @param target_rank the rank to get the data from + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + * @param window the window to put the data into + * @param req the request handle + */ template void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window, std::shared_ptr req = {}); +/** + * Broadcast data from calling process to all ranks in the communicator + * + * @param buffer the buffer to broadcsat + * @param count the number of elements to broadcast + * @param root_rank the rank to broadcast from + * @param comm the communicator + */ template void broadcast(BroadcastType* buffer, int count, int root_rank, std::shared_ptr comm = {}); +/** + * Reduce data into root from all calling processes on the same communicator. + * + * @param send_buffer the buffer to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param op_enum the reduce operation. See @op_type + * @param comm the communicator + * @param req the request handle + */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, op_type op_enum, int root_rank, @@ -390,6 +492,16 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, std::shared_ptr req = {}); +/** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param recv_buffer the data to reduce and the reduced result + * @param count the number of elements to reduce + * @param op_enum the reduce operation. See @op_type + * @param comm the communicator + * @param req the request handle + */ template void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum = op_type::sum, @@ -397,6 +509,17 @@ void all_reduce(ReduceType* recv_buffer, int count, std::shared_ptr req = {}); +/** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param send_buffer the data to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param op_enum the reduce operation. See @op_type + * @param comm the communicator + * @param req the request handle + */ template void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, op_type op_enum = op_type::sum, @@ -404,12 +527,34 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, std::shared_ptr req = {}); +/** + * Gather data onto the root rank from all ranks in the communicator. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param root_rank the rank to gather into + * @param comm the communicator + */ template void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm = {}); +/** + * Gather data onto the root rank from all ranks in the communicator with + * offsets. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param displacements the offsets for the buffer + * @param root_rank the rank to gather into + * @param comm the communicator + */ template void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int* recv_counts, @@ -417,18 +562,46 @@ void gather(const SendType* send_buffer, const int send_count, std::shared_ptr comm = {}); +/** + * Gather data onto all ranks from all ranks in the communicator. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param comm the communicator + */ template void all_gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, std::shared_ptr comm = {}); +/** + * Scatter data from root rank to all ranks in the communicator. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param comm the communicator + */ template void scatter(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm = {}); +/** + * Scatter data from root rank to all ranks in the communicator with offsets. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param displacements the offsets for the buffer + * @param comm the communicator + */ template void scatter(const SendType* send_buffer, const int* send_counts, const int* displacements, RecvType* recv_buffer, @@ -436,12 +609,35 @@ void scatter(const SendType* send_buffer, const int* send_counts, std::shared_ptr comm = {}); +/** + * Communicate data from all ranks to all other ranks in place (MPI_Alltoall). + * See MPI documentation for more details. + * + * @param buffer the buffer to send and the buffer receive + * @param recv_count the number of elements to receive + * @param comm the communicator + * @param req the request handle + * + * @note This overload uses MPI_IN_PLACE and the source and destination buffers + * are the same. + */ template -void all_to_all(RecvType* recv_buffer, const int recv_count, +void all_to_all(RecvType* buffer, const int recv_count, std::shared_ptr comm = {}, std::shared_ptr req = {}); +/** + * Communicate data from all ranks to all other ranks (MPI_Alltoall). + * See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param recv_buffer the buffer to receive + * @param recv_count the number of elements to receive + * @param comm the communicator + * @param req the request handle + */ template void all_to_all(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count = {}, @@ -449,6 +645,20 @@ void all_to_all(const SendType* send_buffer, const int send_count, std::shared_ptr req = {}); +/** + * Communicate data from all ranks to all other ranks with + * offsets (MPI_Alltoallv). See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param send_offsets the offsets for the send buffer + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param recv_offsets the offsets for the recv buffer + * @param stride the stride to be used in case of sending concatenated data + * @param comm the communicator + * @param req the request handle + */ template void all_to_all(const SendType* send_buffer, const int* send_counts, const int* send_offsets, RecvType* recv_buffer, @@ -458,10 +668,22 @@ void all_to_all(const SendType* send_buffer, const int* send_counts, std::shared_ptr req = {}); +/** + * Does a scan operation with the given operator. + * (MPI_Scan). See MPI documentation for more details. + * + * @param send_buffer the buffer to scan from + * @param recv_buffer the result buffer + * @param recv_count the number of elements to scan + * @param op_enum the operation type to be used for the scan. See @op_type + * @param comm the communicator + * @param req the request handle + */ template void scan(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, op_type op_enum = op_type::sum, - std::shared_ptr comm = {}); + std::shared_ptr comm = {}, + std::shared_ptr req = {}); } // namespace mpi diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 9834a630144..b4014ff6ccf 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include diff --git a/mpi/base/bindings.cpp b/mpi/base/bindings.cpp index 8832188e07a..9e89b98bda1 100644 --- a/mpi/base/bindings.cpp +++ b/mpi/base/bindings.cpp @@ -630,12 +630,19 @@ void all_to_all(const SendType* send_buffer, const int* send_counts, template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum, std::shared_ptr comm) + op_type op_enum, std::shared_ptr comm, + std::shared_ptr req) { auto operation = helpers::get_operation(op_enum); auto scan_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::scan(send_buffer, recv_buffer, count, scan_type, operation, - comm ? comm->get() : communicator::get_comm_world()); + if (!req.get()) { + bindings::scan(send_buffer, recv_buffer, count, scan_type, operation, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_scan(send_buffer, recv_buffer, count, scan_type, operation, + comm ? comm->get() : communicator::get_comm_world(), + req->get_requests()); + } } @@ -786,7 +793,9 @@ GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE( #define GKO_DECLARE_SCAN(ScanType) \ void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, \ - op_type op_enum, std::shared_ptr comm) + op_type op_enum, std::shared_ptr comm, \ + std::shared_ptr req) + GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SCAN); diff --git a/mpi/base/bindings.hpp b/mpi/base/bindings.hpp index 8f035081b3d..f3e2ddb97ef 100644 --- a/mpi/base/bindings.hpp +++ b/mpi/base/bindings.hpp @@ -508,6 +508,15 @@ inline void scan(const void* send_buffer, void* recv_buffer, int count, } +inline void i_scan(const void* send_buffer, void* recv_buffer, int count, + MPI_Datatype& reduce_type, MPI_Op operation, + const MPI_Comm& comm, MPI_Request* requests) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, + reduce_type, operation, comm, requests)); +} + + } // namespace bindings } // namespace mpi } // namespace gko diff --git a/mpi/test/base/bindings.cpp b/mpi/test/base/bindings.cpp index 14ae9c3a598..63591e1d36f 100644 --- a/mpi/test/base/bindings.cpp +++ b/mpi/test/base/bindings.cpp @@ -748,6 +748,45 @@ TEST_F(MpiBindings, AllToAllVWorksCorrectly) } +TEST_F(MpiBindings, CanScanValues) +{ + using ValueType = double; + auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto my_rank = gko::mpi::get_my_rank(comm->get()); + auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + ValueType data, sum, max, min; + if (my_rank == 0) { + data = 3; + } else if (my_rank == 1) { + data = 5; + } else if (my_rank == 2) { + data = 2; + } else if (my_rank == 3) { + data = 6; + } + gko::mpi::scan(&data, &sum, 1, gko::mpi::op_type::sum, 0); + gko::mpi::scan(&data, &max, 1, gko::mpi::op_type::max, 0); + gko::mpi::scan(&data, &min, 1, gko::mpi::op_type::min, 0); + if (my_rank == 0) { + EXPECT_EQ(sum, 3.0); + EXPECT_EQ(max, 3.0); + EXPECT_EQ(min, 3.0); + } else if (my_rank == 1) { + EXPECT_EQ(sum, 8.0); + EXPECT_EQ(max, 5.0); + EXPECT_EQ(min, 3.0); + } else if (my_rank == 2) { + EXPECT_EQ(sum, 10.0); + EXPECT_EQ(max, 5.0); + EXPECT_EQ(min, 2.0); + } else if (my_rank == 3) { + EXPECT_EQ(sum, 16.0); + EXPECT_EQ(max, 6.0); + EXPECT_EQ(min, 2.0); + } +} + + // Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for // more details. GKO_DECLARE_GTEST_MPI_MAIN; From da8f2c446f819236b301a77e9bf318652ff08d95 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 25 Oct 2021 17:26:42 +0200 Subject: [PATCH 11/59] Review update, more tests --- cmake/create_test.cmake | 2 +- core/CMakeLists.txt | 2 +- core/device_hooks/mpi_hooks.cpp | 3 ++- include/ginkgo/config.hpp.in | 6 ++++++ mpi/CMakeLists.txt | 2 +- mpi/base/exception.cpp | 12 +++++++----- 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index b5df8320e63..c0f2ecd260f 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -81,7 +81,7 @@ function(ginkgo_create_mpi_test test_name num_mpi_procs) set(OPENMPI_RUN_AS_ROOT_FLAG "") endif() target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ${MPI_C_LIBRARIES} ${MPI_CXX_LIBRARIES}) + target_link_libraries(${TEST_TARGET_NAME} PRIVATE MPI::MPI_CXX) set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${MPIEXEC_EXECUTABLE} ${test_param} ) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index fe731282302..c2d2824d6c6 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -61,7 +61,7 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if (GINKGO_BUILD_MPI) - target_link_libraries(ginkgo PUBLIC "${MPI_C_LIBRARIES}" "${MPI_CXX_LIBRARIES}") + target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX) target_include_directories(ginkgo SYSTEM PUBLIC ${MPI_INCLUDE_PATH}) endif() diff --git a/core/device_hooks/mpi_hooks.cpp b/core/device_hooks/mpi_hooks.cpp index da79eaedcf0..32c492353c8 100644 --- a/core/device_hooks/mpi_hooks.cpp +++ b/core/device_hooks/mpi_hooks.cpp @@ -396,7 +396,8 @@ GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER2); #define GKO_DECLARE_SCAN(ScanType) \ void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, \ - op_type op_enum, std::shared_ptr comm) + op_type op_enum, std::shared_ptr comm, \ + std::shared_ptr req) GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SCAN); diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 1c6a31ea481..8436d8c3abb 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -82,6 +82,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // clang-format on +/* Is MPI available ? */ +// clang-format off +#define GKO_HAVE_MPI @GINKGO_HAVE_MPI@ +// clang-format on + + /* Is HWLOC available ? */ // clang-format off #define GKO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@ diff --git a/mpi/CMakeLists.txt b/mpi/CMakeLists.txt index b8a5f336598..c9f60e937ed 100644 --- a/mpi/CMakeLists.txt +++ b/mpi/CMakeLists.txt @@ -11,7 +11,7 @@ target_sources(ginkgo_mpi ginkgo_compile_features(ginkgo_mpi) target_include_directories(ginkgo_mpi SYSTEM PRIVATE ${MPI_INCLUDE_PATH}) -target_link_libraries(ginkgo_mpi PRIVATE "${MPI_C_LIBRARIES}" "${MPI_CXX_LIBRARIES}") +target_link_libraries(ginkgo_mpi PRIVATE MPI::MPI_CXX) target_compile_options(ginkgo_mpi PRIVATE "${GINKGO_COMPILER_FLAGS}") ginkgo_default_includes(ginkgo_mpi) diff --git a/mpi/base/exception.cpp b/mpi/base/exception.cpp index 1481486b3a5..358fa3dd0b4 100644 --- a/mpi/base/exception.cpp +++ b/mpi/base/exception.cpp @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -44,12 +45,13 @@ namespace gko { std::string MpiError::get_error(int64 error_code) { - int len = MPI_MAX_ERROR_STRING; - char* error_string = new char[len]; - MPI_Error_string(error_code, error_string, &len); - std::string message = "MPI Error: " + std::string(error_string); - delete[] error_string; + int len{}; + std::array error_buf; + MPI_Error_string(error_code, &error_buf[0], &len); + std::string message = "MPI Error: " + std::string(&error_buf[0], len); + return message; } + } // namespace gko From 13d1c5361dc4f2a27ed13b380cd44664fc9e11c9 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 25 Oct 2021 17:27:01 +0200 Subject: [PATCH 12/59] Add an example as an integration test --- examples/CMakeLists.txt | 1 + examples/mpi-simple-solver/CMakeLists.txt | 17 ++ examples/mpi-simple-solver/build.sh | 16 ++ examples/mpi-simple-solver/data/A.mtx | 114 +++++++++++ examples/mpi-simple-solver/data/b.mtx | 21 ++ examples/mpi-simple-solver/data/x0.mtx | 21 ++ examples/mpi-simple-solver/doc/builds-on | 1 + examples/mpi-simple-solver/doc/intro.dox | 19 ++ examples/mpi-simple-solver/doc/kind | 1 + examples/mpi-simple-solver/doc/results.dox | 35 ++++ examples/mpi-simple-solver/doc/short-intro | 1 + examples/mpi-simple-solver/doc/tooltip | 1 + .../mpi-simple-solver/mpi-simple-solver.cpp | 179 ++++++++++++++++++ 13 files changed, 427 insertions(+) create mode 100644 examples/mpi-simple-solver/CMakeLists.txt create mode 100644 examples/mpi-simple-solver/build.sh create mode 100644 examples/mpi-simple-solver/data/A.mtx create mode 100644 examples/mpi-simple-solver/data/b.mtx create mode 100644 examples/mpi-simple-solver/data/x0.mtx create mode 100644 examples/mpi-simple-solver/doc/builds-on create mode 100644 examples/mpi-simple-solver/doc/intro.dox create mode 100644 examples/mpi-simple-solver/doc/kind create mode 100644 examples/mpi-simple-solver/doc/results.dox create mode 100644 examples/mpi-simple-solver/doc/short-intro create mode 100644 examples/mpi-simple-solver/doc/tooltip create mode 100644 examples/mpi-simple-solver/mpi-simple-solver.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 151430846e1..fc5562aaac5 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -23,6 +23,7 @@ set(EXAMPLES_LIST ginkgo-overhead minimal-cuda-solver mixed-spmv + mpi-simple-solver mixed-multigrid-solver multigrid-preconditioned-solver par-ilu-convergence diff --git a/examples/mpi-simple-solver/CMakeLists.txt b/examples/mpi-simple-solver/CMakeLists.txt new file mode 100644 index 00000000000..7f146a1972c --- /dev/null +++ b/examples/mpi-simple-solver/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.9) +project(mpi-simple-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.5.0 REQUIRED) + find_package(MPI REQUIRED) +endif() + +add_executable(mpi-simple-solver mpi-simple-solver.cpp) +target_link_libraries(mpi-simple-solver Ginkgo::ginkgo) +target_link_libraries(mpi-simple-solver MPI::MPI_CXX) + +# Copy the data files to the execution directory +configure_file(data/A.mtx data/A.mtx COPYONLY) +configure_file(data/b.mtx data/b.mtx COPYONLY) +configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/mpi-simple-solver/build.sh b/examples/mpi-simple-solver/build.sh new file mode 100644 index 00000000000..8b025d647b5 --- /dev/null +++ b/examples/mpi-simple-solver/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# set up script +if [ $# -ne 1 ]; then + echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" + exit 1 +fi +BUILD_DIR=$1 +THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) + +source ${THIS_DIR}/../build-setup.sh + +# build +${CXX} -std=c++14 -o ${THIS_DIR}/mpi-simple-solver ${THIS_DIR}/mpi-simple-solver.cpp \ + -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ + -L${THIS_DIR} ${LINK_FLAGS} diff --git a/examples/mpi-simple-solver/data/A.mtx b/examples/mpi-simple-solver/data/A.mtx new file mode 100644 index 00000000000..c67437da567 --- /dev/null +++ b/examples/mpi-simple-solver/data/A.mtx @@ -0,0 +1,114 @@ +%%MatrixMarket matrix coordinate integer symmetric +%------------------------------------------------------------------------------- +% UF Sparse Matrix Collection, Tim Davis +% http://www.cise.ufl.edu/research/sparse/matrices/JGD_Trefethen/Trefethen_20b +% name: JGD_Trefethen/Trefethen_20b +% [Diagonal matrices with primes, Nick Trefethen, Oxford Univ.] +% id: 2203 +% date: 2008 +% author: N. Trefethen +% ed: J.-G. Dumas +% fields: name title A id date author ed kind notes +% kind: combinatorial problem +%------------------------------------------------------------------------------- +% notes: +% Diagonal matrices with primes, Nick Trefethen, Oxford Univ. +% From Jean-Guillaume Dumas' Sparse Integer Matrix Collection, +% http://ljk.imag.fr/membres/Jean-Guillaume.Dumas/simc.html +% +% Problem 7 of the Hundred-dollar, Hundred-digit Challenge Problems, +% SIAM News, vol 35, no. 1. +% +% 7. Let A be the 20,000 x 20,000 matrix whose entries are zero +% everywhere except for the primes 2, 3, 5, 7, . . . , 224737 along the +% main diagonal and the number 1 in all the positions A(i,j) with +% |i-j| = 1,2,4,8, . . . ,16384. What is the (1,1) entry of inv(A)? +% +% http://www.siam.org/news/news.php?id=388 +% +% Filename in JGD collection: Trefethen/trefethen_20__19_minor.sms +%------------------------------------------------------------------------------- +19 19 83 +1 1 3 +2 1 1 +3 1 1 +5 1 1 +9 1 1 +17 1 1 +2 2 5 +3 2 1 +4 2 1 +6 2 1 +10 2 1 +18 2 1 +3 3 7 +4 3 1 +5 3 1 +7 3 1 +11 3 1 +19 3 1 +4 4 11 +5 4 1 +6 4 1 +8 4 1 +12 4 1 +5 5 13 +6 5 1 +7 5 1 +9 5 1 +13 5 1 +6 6 17 +7 6 1 +8 6 1 +10 6 1 +14 6 1 +7 7 19 +8 7 1 +9 7 1 +11 7 1 +15 7 1 +8 8 23 +9 8 1 +10 8 1 +12 8 1 +16 8 1 +9 9 29 +10 9 1 +11 9 1 +13 9 1 +17 9 1 +10 10 31 +11 10 1 +12 10 1 +14 10 1 +18 10 1 +11 11 37 +12 11 1 +13 11 1 +15 11 1 +19 11 1 +12 12 41 +13 12 1 +14 12 1 +16 12 1 +13 13 43 +14 13 1 +15 13 1 +17 13 1 +14 14 47 +15 14 1 +16 14 1 +18 14 1 +15 15 53 +16 15 1 +17 15 1 +19 15 1 +16 16 59 +17 16 1 +18 16 1 +17 17 61 +18 17 1 +19 17 1 +18 18 67 +19 18 1 +19 19 71 diff --git a/examples/mpi-simple-solver/data/b.mtx b/examples/mpi-simple-solver/data/b.mtx new file mode 100644 index 00000000000..05d92ecc6f7 --- /dev/null +++ b/examples/mpi-simple-solver/data/b.mtx @@ -0,0 +1,21 @@ +%%MatrixMarket matrix array real general +19 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mpi-simple-solver/data/x0.mtx b/examples/mpi-simple-solver/data/x0.mtx new file mode 100644 index 00000000000..91d470cdbcd --- /dev/null +++ b/examples/mpi-simple-solver/data/x0.mtx @@ -0,0 +1,21 @@ +%%MatrixMarket matrix array real general +19 1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/examples/mpi-simple-solver/doc/builds-on b/examples/mpi-simple-solver/doc/builds-on new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/examples/mpi-simple-solver/doc/builds-on @@ -0,0 +1 @@ + diff --git a/examples/mpi-simple-solver/doc/intro.dox b/examples/mpi-simple-solver/doc/intro.dox new file mode 100644 index 00000000000..70bc1ce3cc7 --- /dev/null +++ b/examples/mpi-simple-solver/doc/intro.dox @@ -0,0 +1,19 @@ + +

Introduction

+This simple solver example should help you get started with Ginkgo. This example is meant for you to understand +how Ginkgo works and how you can solve a simple linear system with Ginkgo. We encourage you to play with the code, +change the parameters and see what is best suited for your purposes. + +

About the example

+Each example has the following sections: +
    +
  1. Introduction:This gives an overview of the example and mentions + any interesting aspects in the example that might help the reader. +
  2. The commented program: This section is intended for you to + understand the details of the example so that you can play with it and understand + Ginkgo and its features better. +
  3. Results: This section shows the results of the code when run. Though the + results may not be completely the same, you can expect the behaviour to be similar. +
  4. The plain program: This is the complete code without any comments to have + an complete overview of the code. +
diff --git a/examples/mpi-simple-solver/doc/kind b/examples/mpi-simple-solver/doc/kind new file mode 100644 index 00000000000..15a13db4511 --- /dev/null +++ b/examples/mpi-simple-solver/doc/kind @@ -0,0 +1 @@ +basic diff --git a/examples/mpi-simple-solver/doc/results.dox b/examples/mpi-simple-solver/doc/results.dox new file mode 100644 index 00000000000..69a4ef8211c --- /dev/null +++ b/examples/mpi-simple-solver/doc/results.dox @@ -0,0 +1,35 @@ +

Results

+The following is the expected result: + +@code{.cpp} + +Solution (x): +%%MatrixMarket matrix array real general +19 1 +0.252218 +0.108645 +0.0662811 +0.0630433 +0.0384088 +0.0396536 +0.0402648 +0.0338935 +0.0193098 +0.0234653 +0.0211499 +0.0196413 +0.0199151 +0.0181674 +0.0162722 +0.0150714 +0.0107016 +0.0121141 +0.0123025 +Residual norm sqrt(r^T r): +%%MatrixMarket matrix array real general +1 1 +2.10788e-15 + +@endcode + +

Comments about programming and debugging

diff --git a/examples/mpi-simple-solver/doc/short-intro b/examples/mpi-simple-solver/doc/short-intro new file mode 100644 index 00000000000..578df81366b --- /dev/null +++ b/examples/mpi-simple-solver/doc/short-intro @@ -0,0 +1 @@ +The simple solver example. diff --git a/examples/mpi-simple-solver/doc/tooltip b/examples/mpi-simple-solver/doc/tooltip new file mode 100644 index 00000000000..52c4f6d5f44 --- /dev/null +++ b/examples/mpi-simple-solver/doc/tooltip @@ -0,0 +1 @@ +Solve a simple linear system with CG. Read a matrix and right hand side from a file. diff --git a/examples/mpi-simple-solver/mpi-simple-solver.cpp b/examples/mpi-simple-solver/mpi-simple-solver.cpp new file mode 100644 index 00000000000..f150c37a96c --- /dev/null +++ b/examples/mpi-simple-solver/mpi-simple-solver.cpp @@ -0,0 +1,179 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// @sect3{Include files} + +// This is the main ginkgo header file. +#include + +// Add the fstream header to read from data from files. +#include +// Add the C++ iostream header to output information to the console. +#include +// Add the STL map header for the executor selection +#include +// Add the string manipulation header to handle strings. +#include + + +int main(int argc, char* argv[]) +{ + auto mpi_init_fin = gko::mpi::init_finalize(argc, argv); + { + auto mpi_init_fin2 = gko::mpi::init_finalize(argc, argv); + + // Use some shortcuts. In Ginkgo, vectors are seen as a + // gko::matrix::Dense with one column/one row. The advantage of this + // concept is that using multiple vectors is a now a natural extension + // of adding columns/rows are necessary. + using ValueType = double; + using RealValueType = gko::remove_complex; + using IndexType = int; + using vec = gko::matrix::Dense; + using real_vec = gko::matrix::Dense; + // The gko::matrix::Csr class is used here, but any other matrix class + // such as gko::matrix::Coo, gko::matrix::Hybrid, gko::matrix::Ell or + // gko::matrix::Sellp could also be used. + using mtx = gko::matrix::Csr; + // The gko::solver::Cg is used here, but any other solver class can also + // be used. + using cg = gko::solver::Cg; + + // Print the ginkgo version information. + std::cout << gko::version_info::get() << std::endl; + + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor] " << std::endl; + std::exit(-1); + } + + // @sect3{Where do you want to run your solver ?} + // The gko::Executor class is one of the cornerstones of Ginkgo. + // Currently, we have support for an gko::OmpExecutor, which uses OpenMP + // multi-threading in most of its kernels, a gko::ReferenceExecutor, a + // single threaded specialization of the OpenMP executor and a + // gko::CudaExecutor which runs the code on a NVIDIA GPU if available. + // @note With the help of C++, you see that you only ever need to change + // the executor and all the other functions/ routines within Ginkgo + // should automatically work and run on the executor with any other + // changes. + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create( + 0, gko::OmpExecutor::create(), true); + }}, + {"hip", + [] { + return gko::HipExecutor::create( + 0, gko::OmpExecutor::create(), true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create( + 0, gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = + exec_map.at(executor_string)(); // throws if not valid + + // @sect3{Reading your data and transfer to the proper device.} + // Read the matrix, right hand side and the initial solution using the + // @ref read function. + // @note Ginkgo uses C++ smart pointers to automatically manage memory. + // To this end, we use our own object ownership transfer functions that + // under the hood call the required smart pointer functions to manage + // object ownership. The gko::share , gko::give and gko::lend are the + // functions that you would need to use. + auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); + auto b = gko::read(std::ifstream("data/b.mtx"), exec); + auto x = gko::read(std::ifstream("data/x0.mtx"), exec); + + // @sect3{Creating the solver} + // Generate the gko::solver factory. Ginkgo uses the concept of + // Factories to build solvers with certain properties. Observe the + // Fluent interface used here. Here a cg solver is generated with a + // stopping criteria of maximum iterations of 20 and a residual norm + // reduction of 1e-7. You also observe that the stopping + // criteria(gko::stop) are also generated from factories using their + // build methods. You need to specify the executors which each of the + // object needs to be built on. + const RealValueType reduction_factor{1e-7}; + auto solver_gen = + cg::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(20u).on(exec), + gko::stop::ResidualNorm::build() + .with_reduction_factor(reduction_factor) + .on(exec)) + .on(exec); + // Generate the solver from the matrix. The solver factory built in the + // previous step takes a "matrix"(a gko::LinOp to be more general) as an + // input. In this case we provide it with a full matrix that we + // previously read, but as the solver only effectively uses the apply() + // method within the provided "matrix" object, you can effectively + // create a gko::LinOp class with your own apply implementation to + // accomplish more tasks. We will see an example of how this can be done + // in the custom-matrix-format example + auto solver = solver_gen->generate(A); + + // Finally, solve the system. The solver, being a gko::LinOp, can be + // applied to a right hand side, b to obtain the solution, x. + solver->apply(lend(b), lend(x)); + + // Print the solution to the command line. + std::cout << "Solution (x):\n"; + write(std::cout, lend(x)); + + // To measure if your solution has actually converged, you can measure + // the error of the solution. one, neg_one are objects that represent + // the numbers which allow for a uniform interface when computing on any + // device. To compute the residual, all you need to do is call the apply + // method, which in this case is an spmv and equivalent to the LAPACK + // z_spmv routine. Finally, you compute the euclidean 2-norm with the + // compute_norm2 function. + auto one = gko::initialize({1.0}, exec); + auto neg_one = gko::initialize({-1.0}, exec); + auto res = gko::initialize({0.0}, exec); + A->apply(lend(one), lend(x), lend(neg_one), lend(b)); + b->compute_norm2(lend(res)); + + std::cout << "Residual norm sqrt(r^T r):\n"; + write(std::cout, lend(res)); + } + gko::mpi::synchronize(); +} From 91aadbaae4a8edafcd89a8788a728f130f46a237 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 28 Oct 2021 14:15:04 +0200 Subject: [PATCH 13/59] Disable MPI compilation when not being used. --- core/CMakeLists.txt | 9 +- core/device_hooks/CMakeLists.txt | 9 - core/device_hooks/mpi_hooks.cpp | 435 --------------------------- devices/CMakeLists.txt | 4 +- examples/CMakeLists.txt | 5 +- include/ginkgo/core/base/mpi.hpp | 46 +-- include/ginkgo/core/base/version.hpp | 12 +- mpi/CMakeLists.txt | 2 +- mpi/test/base/bindings.cpp | 2 + mpi/test/base/communicator.cpp | 1 + mpi/test/base/exception_helpers.cpp | 1 + 11 files changed, 33 insertions(+), 493 deletions(-) delete mode 100644 core/device_hooks/mpi_hooks.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index c2d2824d6c6..17e6b7055ad 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -71,8 +71,13 @@ target_compile_options(ginkgo PRIVATE "${GINKGO_COMPILER_FLAGS}") # add a namespace alias so Ginkgo can always be included as Ginkgo::ginkgo # regardless of whether it is installed or added as a subdirectory add_library(Ginkgo::ginkgo ALIAS ginkgo) -target_link_libraries(ginkgo - PUBLIC ginkgo_device ginkgo_omp ginkgo_mpi ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) +if(GINKGO_BUILD_MPI) + target_link_libraries(ginkgo + PUBLIC ginkgo_device ginkgo_omp ginkgo_mpi ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) +else() + target_link_libraries(ginkgo + PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) +endif() # The PAPI dependency needs to be exposed to the user. set(GKO_RPATH_ADDITIONS "") if (GINKGO_HAVE_PAPI_SDE) diff --git a/core/device_hooks/CMakeLists.txt b/core/device_hooks/CMakeLists.txt index 1c658e73172..fcb370a81a0 100644 --- a/core/device_hooks/CMakeLists.txt +++ b/core/device_hooks/CMakeLists.txt @@ -41,15 +41,6 @@ if (NOT GINKGO_BUILD_OMP) ginkgo_install_library(ginkgo_omp) endif() -if(NOT GINKGO_BUILD_MPI) - add_library(ginkgo_mpi - $ - mpi_hooks.cpp) - ginkgo_compile_features(ginkgo_mpi) - ginkgo_default_includes(ginkgo_mpi) - ginkgo_install_library(ginkgo_mpi mpi) -endif() - if (NOT GINKGO_BUILD_REFERENCE) add_library(ginkgo_reference $ diff --git a/core/device_hooks/mpi_hooks.cpp b/core/device_hooks/mpi_hooks.cpp deleted file mode 100644 index 32c492353c8..00000000000 --- a/core/device_hooks/mpi_hooks.cpp +++ /dev/null @@ -1,435 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include -#include -#include -#include - - -namespace gko { - - -version version_info::get_mpi_version() noexcept -{ - // We just return 1.0.0 with a special "not compiled" tag in placeholder - // modules. - return {1, 0, 0, "not compiled"}; -} - - -std::string MpiError::get_error(int64) -{ - return "ginkgo MPI module is not compiled"; -} - - -namespace mpi { - - -bool init_finalize::is_finalized() GKO_NOT_COMPILED(mpi); - - -bool init_finalize::is_initialized() GKO_NOT_COMPILED(mpi); - - -init_finalize::init_finalize(int& argc, char**& argv, - const size_type num_threads) GKO_NOT_COMPILED(mpi); - - -init_finalize::~init_finalize() {} - - -communicator::communicator(const MPI_Comm& comm) GKO_NOT_COMPILED(mpi); - - -communicator::communicator() GKO_NOT_COMPILED(mpi); - - -communicator::communicator(const MPI_Comm& comm_in, int color, int key) - GKO_NOT_COMPILED(mpi); - - -communicator::~communicator() {} - -info::info() GKO_NOT_COMPILED(mpi); - -void info::add(std::string key, std::string value) GKO_NOT_COMPILED(mpi); - - -void info::remove(std::string key) GKO_NOT_COMPILED(mpi); - - -info::~info() {} - - -bool communicator::compare(const MPI_Comm& comm) const GKO_NOT_COMPILED(mpi); - - -template -window::window(ValueType* base, unsigned int size, - std::shared_ptr comm, - const int disp_unit, info input_info, - win_type create_type) GKO_NOT_COMPILED(mpi); - - -template -void window::fence(int assert) GKO_NOT_COMPILED(mpi); - - -template -void window::lock(int rank, int assert, lock_type lock_t) - GKO_NOT_COMPILED(mpi); - - -template -void window::unlock(int rank) GKO_NOT_COMPILED(mpi); - - -template -void window::lock_all(int assert) GKO_NOT_COMPILED(mpi); - - -template -void window::unlock_all() GKO_NOT_COMPILED(mpi); - - -template -void window::flush(int rank) GKO_NOT_COMPILED(mpi); - - -template -void window::flush_local(int rank) GKO_NOT_COMPILED(mpi); - - -template -void window::flush_all() GKO_NOT_COMPILED(mpi); - - -template -void window::flush_all_local() GKO_NOT_COMPILED(mpi); - - -template -window::~window() -{} - - -MPI_Op create_operation( - const std::function func, - void* arg1, void* arg2, int* len, MPI_Datatype* type) GKO_NOT_COMPILED(mpi); - - -double get_walltime() GKO_NOT_COMPILED(mpi); - - -int get_my_rank(const communicator& comm) GKO_NOT_COMPILED(mpi); - - -int get_local_rank(const communicator& comm) GKO_NOT_COMPILED(mpi); - - -int get_num_ranks(const communicator& comm) GKO_NOT_COMPILED(mpi); - - -void synchronize(const communicator& comm) GKO_NOT_COMPILED(mpi); - - -void wait(std::shared_ptr req, std::shared_ptr status) - GKO_NOT_COMPILED(mpi); - - -template -void send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - std::shared_ptr req, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, - const int recv_tag, std::shared_ptr req, - std::shared_ptr status, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -template -void get(GetType* origin_buffer, const int origin_count, const int target_rank, - const unsigned int target_disp, const int target_count, - window& window, std::shared_ptr req) - GKO_NOT_COMPILED(mpi); - - -template -void broadcast(BroadcastType* buffer, int count, int root_rank, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum, int root_rank, std::shared_ptr req, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, - std::shared_ptr comm, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -template -void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, op_type op_enum, - std::shared_ptr comm, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -template -void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void all_gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void scatter(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void scatter(const SendType* send_buffer, const int* send_counts, - const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank, - std::shared_ptr comm) GKO_NOT_COMPILED(mpi); - - -template -void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum, std::shared_ptr comm, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -template -void all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -template -void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -template -void all_to_all(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const int stride, std::shared_ptr comm, - std::shared_ptr req) GKO_NOT_COMPILED(mpi); - - -#define GKO_DECLARE_WINDOW(ValueType) class window - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_WINDOW); - - -#define GKO_DECLARE_SEND(SendType) \ - void send(const SendType* send_buffer, const int send_count, \ - const int destination_rank, const int send_tag, \ - std::shared_ptr req, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SEND); - - -#define GKO_DECLARE_RECV(RecvType) \ - void recv(RecvType* recv_buffer, const int recv_count, \ - const int source_rank, const int recv_tag, \ - std::shared_ptr req, std::shared_ptr status, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_RECV); - - -#define GKO_DECLARE_PUT(PutType) \ - void put(const PutType* origin_buffer, const int origin_count, \ - const int target_rank, const unsigned int target_disp, \ - const int target_count, window& window, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_PUT); - - -#define GKO_DECLARE_GET(GetType) \ - void get(GetType* origin_buffer, const int origin_count, \ - const int target_rank, const unsigned int target_disp, \ - const int target_count, window& window, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_GET); - - -#define GKO_DECLARE_BCAST(BroadcastType) \ - void broadcast(BroadcastType* buffer, int count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_BCAST); - - -#define GKO_DECLARE_REDUCE(ReduceType) \ - void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ - int count, op_type operation, int root_rank, \ - std::shared_ptr req, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_REDUCE); - - -#define GKO_DECLARE_ALLREDUCE1(ReduceType) \ - void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE1); - -#define GKO_DECLARE_ALLREDUCE2(ReduceType) \ - void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ - int count, op_type operation, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE2); - - -#define GKO_DECLARE_GATHER1(SendType, RecvType) \ - void gather(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER1); - - -#define GKO_DECLARE_GATHER2(SendType, RecvType) \ - void gather(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int* recv_counts, \ - const int* displacements, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER2); - - -#define GKO_DECLARE_ALLGATHER(SendType, RecvType) \ - void all_gather(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALLGATHER); - - -#define GKO_DECLARE_SCATTER1(SendType, RecvType) \ - void scatter(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER1); - - -#define GKO_DECLARE_SCATTER2(SendType, RecvType) \ - void scatter(const SendType* send_buffer, const int* send_counts, \ - const int* displacements, RecvType* recv_buffer, \ - const int recv_count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER2); - - -#define GKO_DECLARE_SCAN(ScanType) \ - void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, \ - op_type op_enum, std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SCAN); - - -#define GKO_DECLARE_ALL_TO_ALL1(RecvType) \ - void all_to_all(RecvType* recv_buffer, const int recv_count, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALL_TO_ALL1); - - -#define GKO_DECLARE_ALL_TO_ALL2(SendType, RecvType) \ - void all_to_all(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALL_TO_ALL2); - - -#define GKO_DECLARE_ALL_TO_ALL_V(SendType, RecvType) \ - void all_to_all(const SendType* send_buffer, const int* send_counts, \ - const int* send_offsets, RecvType* recv_buffer, \ - const int* recv_counts, const int* recv_offsets, \ - const int stride, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_ALL_TO_ALL_V); - - -} // namespace mpi -} // namespace gko diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index 0f86fabfef5..f87e5fc9144 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -23,5 +23,7 @@ add_subdirectory(cuda) add_subdirectory(dpcpp) add_subdirectory(hip) add_subdirectory(omp) -add_subdirectory(mpi) +if(GINKGO_BUILD_MPI) + add_subdirectory(mpi) +endif() add_subdirectory(reference) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index fc5562aaac5..5d7feccf592 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -23,7 +23,6 @@ set(EXAMPLES_LIST ginkgo-overhead minimal-cuda-solver mixed-spmv - mpi-simple-solver mixed-multigrid-solver multigrid-preconditioned-solver par-ilu-convergence @@ -32,6 +31,10 @@ set(EXAMPLES_LIST schroedinger-splitting simple-solver-logging) +if(GINKGO_BUILD_MPI) + list(APPEND EXAMPLES_LIST mpi-simple-solver) +endif() + if(GINKGO_BUILD_CUDA AND GINKGO_BUILD_OMP) list(APPEND EXAMPLES_LIST custom-matrix-format) endif() diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index af23e092bf7..d31c74a7e90 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -52,49 +52,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if GKO_HAVE_MPI -#include -#endif - - -#ifndef MPI_VERSION - -using MPI_Comm = int; -using MPI_Status = int; -using MPI_Request = int; -using MPI_Datatype = int; -using MPI_Op = int; -using MPI_Win = int*; -using MPI_Info = int*; - -#ifndef MPI_COMM_WORLD -#define MPI_COMM_WORLD 0 -#endif -#ifndef MPI_COMM_SELF -#define MPI_COMM_SELF 0 -#endif -#ifndef MPI_COMM_NULL -#define MPI_COMM_NULL 0 -#endif -#ifndef MPI_WIN_NULL -#define MPI_WIN_NULL nullptr -#endif -#ifndef MPI_REQUEST_NULL -#define MPI_REQUEST_NULL 0 -#endif -#ifndef MPI_INFO_NULL -#define MPI_INFO_NULL nullptr -#endif -#ifndef MPI_MIN -#define MPI_MIN 0 -#endif -#ifndef MPI_MAX -#define MPI_MAX 0 -#endif -#ifndef MPI_SUM -#define MPI_SUM 0 -#endif -#endif +#include template @@ -690,4 +649,7 @@ void scan(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, } // namespace gko +#endif // GKO_HAVE_MPI + + #endif // GKO_PUBLIC_CORE_BASE_MPI_HPP_ diff --git a/include/ginkgo/core/base/version.hpp b/include/ginkgo/core/base/version.hpp index 6d1d5aa7510..6e58a2c9323 100644 --- a/include/ginkgo/core/base/version.hpp +++ b/include/ginkgo/core/base/version.hpp @@ -219,6 +219,8 @@ class version_info { */ version dpcpp_version; +#if GKO_HAVE_MPI + /** * Contains version information of the MPI module. * @@ -226,6 +228,8 @@ class version_info { */ version mpi_version; +#endif + private: static constexpr version get_header_version() noexcept { @@ -245,7 +249,9 @@ class version_info { static version get_dpcpp_version() noexcept; +#if GKO_HAVE_MPI static version get_mpi_version() noexcept; +#endif version_info() : header_version{get_header_version()}, @@ -254,8 +260,10 @@ class version_info { omp_version{get_omp_version()}, cuda_version{get_cuda_version()}, hip_version{get_hip_version()}, - dpcpp_version{get_dpcpp_version()}, - mpi_version{get_mpi_version()} +#if GKO_HAVE_MPI + mpi_version{get_mpi_version()}, +#endif + dpcpp_version{get_dpcpp_version()} {} }; diff --git a/mpi/CMakeLists.txt b/mpi/CMakeLists.txt index c9f60e937ed..febad2f1b7e 100644 --- a/mpi/CMakeLists.txt +++ b/mpi/CMakeLists.txt @@ -1,6 +1,6 @@ find_package(MPI REQUIRED) -add_library(ginkgo_mpi $ "") +add_library(ginkgo_mpi) target_sources(ginkgo_mpi PRIVATE base/exception.cpp diff --git a/mpi/test/base/bindings.cpp b/mpi/test/base/bindings.cpp index 63591e1d36f..dfbd958106b 100644 --- a/mpi/test/base/bindings.cpp +++ b/mpi/test/base/bindings.cpp @@ -45,12 +45,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "gtest-mpi-main.hpp" +#include #include #include #include #include #include + class MpiBindings : public ::testing::Test { protected: MpiBindings() : ref(gko::ReferenceExecutor::create()) {} diff --git a/mpi/test/base/communicator.cpp b/mpi/test/base/communicator.cpp index 92b343e5adf..9659598c8c6 100644 --- a/mpi/test/base/communicator.cpp +++ b/mpi/test/base/communicator.cpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "gtest-mpi-main.hpp" +#include #include #include #include diff --git a/mpi/test/base/exception_helpers.cpp b/mpi/test/base/exception_helpers.cpp index f0878b2081f..5411c1bafb1 100644 --- a/mpi/test/base/exception_helpers.cpp +++ b/mpi/test/base/exception_helpers.cpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "gtest-mpi-main.hpp" +#include #include #include From 518be63ce99dcb9c43e83989fd0c463079caca3b Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 29 Oct 2021 11:54:13 +0200 Subject: [PATCH 14/59] Fix test_install and MPI run as root --- .gitlab-ci.yml | 6 +++++- cmake/GinkgoConfig.cmake.in | 5 +++++ test/test_install/CMakeLists.txt | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1cc8683bbd8..7abb79d69a7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -31,6 +31,7 @@ include: BUILD_HIP: "OFF" BUILD_HWLOC: "ON" BUILD_MPI: "OFF" + MPI_AS_ROOT: "OFF" FAST_TESTS: "OFF" DPCPP_SINGLE_MODE: "OFF" MIXED_PRECISION: "ON" @@ -76,7 +77,7 @@ include: -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} - -DMPI_RUN_AS_ROOT=${MPI_RUN_AS_ROOT} + -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON -DGINKGO_FAST_TESTS=${FAST_TESTS} @@ -567,6 +568,7 @@ build/nocuda/gcc/mpi/debug/shared: variables: <<: *default_variables BUILD_MPI: "ON" + MPI_AS_ROOT: "ON" BUILD_TYPE: "Debug" BUILD_SHARED_LIBS: "ON" @@ -579,6 +581,7 @@ build/nocuda/gcc/mpi/release/static: variables: <<: *default_variables BUILD_MPI: "ON" + MPI_AS_ROOT: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" @@ -593,6 +596,7 @@ build/cuda100/mpi/gcc/all/debug/shared: BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_MPI: "ON" + MPI_AS_ROOT: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Debug" FAST_TESTS: "ON" diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index 3b984e2a33a..2c129dfaa6a 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -36,6 +36,7 @@ set(GINKGO_BUILD_REFERENCE @GINKGO_BUILD_REFERENCE@) set(GINKGO_BUILD_OMP @GINKGO_BUILD_OMP@) set(GINKGO_BUILD_CUDA @GINKGO_BUILD_CUDA@) set(GINKGO_BUILD_HIP @GINKGO_BUILD_HIP@) +set(GINKGO_BUILD_MPI @GINKGO_BUILD_MPI@) set(GINKGO_BUILD_DPCPP @GINKGO_BUILD_DPCPP@) set(GINKGO_DEVEL_TOOLS @GINKGO_DEVEL_TOOLS@) @@ -81,6 +82,10 @@ set(GINKGO_DPCPP_FLAGS @GINKGO_DPCPP_FLAGS@) set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@) set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@) +set(GINKGO_HAVE_MPI @GINKGO_HAVE_MPI@) +set(GINKGO_HAVE_CUDA_AWARE_MPI @GINKGO_HAVE_CUDA_AWARE_MPI@) +set(GINKGO_FORCE_CUDA_AWARE_MPI @GINKGO_FORCE_CUDA_AWARE_MPI@) + set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@) set(GINKGO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@) diff --git a/test/test_install/CMakeLists.txt b/test/test_install/CMakeLists.txt index 467d09c8186..c635a62b25c 100644 --- a/test/test_install/CMakeLists.txt +++ b/test/test_install/CMakeLists.txt @@ -29,6 +29,10 @@ add_executable(test_install test_install.cpp) target_compile_features(test_install PUBLIC cxx_std_14) target_compile_definitions(test_install PRIVATE HAS_REFERENCE=${HAS_REFERENCE}) target_link_libraries(test_install PRIVATE Ginkgo::ginkgo) +if(GINKGO_BUILD_MPI) + find_package(MPI REQUIRED) + target_link_libraries(test_install PRIVATE MPI::MPI_CXX) +endif() if(GINKGO_BUILD_CUDA) enable_language(CUDA) From 64bd141693fed79a47b99c15b28b75c9b7db6d28 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 2 Nov 2021 10:50:39 +0100 Subject: [PATCH 15/59] Make init_finalize a singleton --- .../mpi-simple-solver/mpi-simple-solver.cpp | 4 ++-- include/ginkgo/core/base/mpi.hpp | 23 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/examples/mpi-simple-solver/mpi-simple-solver.cpp b/examples/mpi-simple-solver/mpi-simple-solver.cpp index f150c37a96c..86d7b2f4aa1 100644 --- a/examples/mpi-simple-solver/mpi-simple-solver.cpp +++ b/examples/mpi-simple-solver/mpi-simple-solver.cpp @@ -47,9 +47,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int main(int argc, char* argv[]) { - auto mpi_init_fin = gko::mpi::init_finalize(argc, argv); + auto mpi_init_fin = gko::mpi::init_finalize::get_instance(argc, argv); { - auto mpi_init_fin2 = gko::mpi::init_finalize(argc, argv); + auto mpi_init_fin2 = gko::mpi::init_finalize::get_instance(argc, argv); // Use some shortcuts. In Ginkgo, vectors are seen as a // gko::matrix::Dense with one column/one row. The advantage of this diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index d31c74a7e90..b1a990ea4e2 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -91,25 +91,24 @@ enum class op_type { */ class init_finalize { public: - init_finalize(int& argc, char**& argv, const size_type num_threads = 1); - - init_finalize() = delete; - - init_finalize(init_finalize& other) = default; - - init_finalize& operator=(const init_finalize& other) = default; - - init_finalize(init_finalize&& other) = default; - - init_finalize& operator=(init_finalize&& other) = default; + static init_finalize* get_instance(int& argc, char**& argv, + const size_type num_threads = 1) + { + static init_finalize instance(argc, argv, num_threads); + return &instance; + } static bool is_finalized(); static bool is_initialized(); +private: + init_finalize(int& argc, char**& argv, const size_type num_threads = 1); + + init_finalize() = delete; + ~init_finalize(); -private: int num_args_; int required_thread_support_; int provided_thread_support_; From 320ab30e8c1a96f73fdd9db5bc52e5b32dd6bd72 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 3 Nov 2021 18:24:45 +0100 Subject: [PATCH 16/59] Review WIP --- include/ginkgo/core/base/mpi.hpp | 52 ++++++++++++++++++----------- mpi/base/bindings.cpp | 57 ++++++++++++++++---------------- mpi/base/bindings.hpp | 12 +++++++ 3 files changed, 72 insertions(+), 49 deletions(-) diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index b1a990ea4e2..6930f2f375a 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -56,10 +56,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -template -using array_manager = std::unique_ptr>; - - namespace gko { namespace mpi { @@ -85,16 +81,29 @@ enum class op_type { /* - * Class that allows an RAII of initialization and calls MPI_Finalize at the - * end of its scope. Therefore this must be called before any of the MPI + * This enum specifies the threading type to be used when creating an MPI + * environment. + */ +enum class thread_type { + serialized = MPI_THREAD_SERIALIZED, + funneled = MPI_THREAD_FUNNELED, + single = MPI_THREAD_SINGLE, + multiple = MPI_THREAD_MULTIPLE +}; + + +/* + * Class that sets up and finalizes the MPI exactly once per program execution. + * using the singleton pattern. This must be called before any of the MPI * functions. */ class init_finalize { public: - static init_finalize* get_instance(int& argc, char**& argv, - const size_type num_threads = 1) + static init_finalize* get_instance( + int& argc, char**& argv, + const thread_type thread_t = thread_type::serialized) { - static init_finalize instance(argc, argv, num_threads); + static init_finalize instance(argc, argv, thread_t); return &instance; } @@ -103,7 +112,7 @@ class init_finalize { static bool is_initialized(); private: - init_finalize(int& argc, char**& argv, const size_type num_threads = 1); + init_finalize(int& argc, char**& argv, const thread_type thread_t); init_finalize() = delete; @@ -123,9 +132,11 @@ class init_finalize { */ class info { public: - info(); + info() : info_(MPI_INFO_NULL) {} + + explicit info(MPI_Info input); - info(MPI_Info input) { this->info_ = input; } + void create_default(); void remove(std::string key); @@ -150,16 +161,19 @@ class info { */ class request : public EnableSharedCreateMethod { public: - request(const int size) : req_(new MPI_Request[size]) {} + explicit request(const int size) : req_(new MPI_Request[size]) {} request() : req_(new MPI_Request[1]) {} + void free(MPI_Request* req); + ~request() { - if (req_) delete[] req_; + // this->free(this->req_); + delete[] req_; } - MPI_Request* get_requests() const { return req_; } + MPI_Request* get() const { return req_; } private: MPI_Request* req_; @@ -167,9 +181,8 @@ class request : public EnableSharedCreateMethod { /** - * A status class that takes in the given status and duplicates it - * for our purposes. As the class or object goes out of scope, the status - * is freed. + * A status class that allows creation of MPI_Status and + * frees the status array when it goes out of scope */ class status : public EnableSharedCreateMethod { public: @@ -275,8 +288,7 @@ class window { window(ValueType* base, unsigned int size, std::shared_ptr comm, - const int disp_unit = sizeof(ValueType), - info input_info = info(MPI_INFO_NULL), + const int disp_unit = sizeof(ValueType), info input_info = info(), win_type create_type = win_type::create); MPI_Win get() { return this->window_; } diff --git a/mpi/base/bindings.cpp b/mpi/base/bindings.cpp index 9e89b98bda1..b0006131cce 100644 --- a/mpi/base/bindings.cpp +++ b/mpi/base/bindings.cpp @@ -67,11 +67,11 @@ bool init_finalize::is_finalized() init_finalize::init_finalize(int& argc, char**& argv, - const size_type num_threads) + const thread_type thread_t) { auto flag = is_initialized(); if (!flag) { - this->required_thread_support_ = MPI_THREAD_SERIALIZED; + this->required_thread_support_ = static_cast(thread_t); GKO_ASSERT_NO_MPI_ERRORS( MPI_Init_thread(&argc, &argv, this->required_thread_support_, &(this->provided_thread_support_))); @@ -167,10 +167,16 @@ communicator& communicator::operator=(communicator&& other) } +void request::free(MPI_Request* req) { bindings::free_requests(this->req_); } + + communicator::~communicator() { bindings::free_comm(this->comm_); } -info::info() { bindings::create_info(&this->info_); } +void info::create_default() { bindings::create_info(&this->info_); } + + +info::info(MPI_Info info) { bindings::duplicate_info(info, &this->info_); } void info::add(std::string key, std::string value) @@ -335,9 +341,9 @@ void synchronize(const communicator& comm) { bindings::barrier(comm.get()); } void wait(std::shared_ptr req, std::shared_ptr status) { if (status.get()) { - bindings::wait(req->get_requests(), status->get_statuses()); + bindings::wait(req->get(), status->get_statuses()); } else { - bindings::wait(req->get_requests(), MPI_STATUS_IGNORE); + bindings::wait(req->get(), MPI_STATUS_IGNORE); } } @@ -354,10 +360,9 @@ void send(const SendType* send_buffer, const int send_count, send_tag, comm ? comm->get() : communicator::get_comm_world()); } else { - bindings::i_send(send_buffer, send_count, send_type, destination_rank, - send_tag, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + bindings::i_send( + send_buffer, send_count, send_type, destination_rank, send_tag, + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -375,10 +380,9 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, comm ? comm->get() : communicator::get_comm_world(), MPI_STATUS_IGNORE); } else { - bindings::i_recv(recv_buffer, recv_count, recv_type, source_rank, - recv_tag, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + bindings::i_recv( + recv_buffer, recv_count, recv_type, source_rank, recv_tag, + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -396,7 +400,7 @@ void put(const PutType* origin_buffer, const int origin_count, } else { bindings::req_put(origin_buffer, origin_count, put_type, target_rank, target_disp, target_count, put_type, window.get(), - req->get_requests()); + req->get()); } } @@ -413,7 +417,7 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, } else { bindings::req_get(origin_buffer, origin_count, get_type, target_rank, target_disp, target_count, get_type, window.get(), - req->get_requests()); + req->get()); } } @@ -441,10 +445,9 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, operation, root_rank, comm ? comm->get() : communicator::get_comm_world()); } else { - bindings::i_reduce(send_buffer, recv_buffer, count, reduce_type, - operation, root_rank, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + bindings::i_reduce( + send_buffer, recv_buffer, count, reduce_type, operation, root_rank, + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -464,7 +467,7 @@ void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, bindings::i_all_reduce( bindings::in_place(), recv_buffer, count, reduce_type, operation, comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + req->get()); } } @@ -484,8 +487,7 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, } else { bindings::i_all_reduce( send_buffer, recv_buffer, count, reduce_type, operation, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -572,8 +574,7 @@ void all_to_all(RecvType* recv_buffer, const int recv_count, bindings::i_all_to_all( bindings::in_place(), recv_count, recv_type, recv_buffer, recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -595,8 +596,7 @@ void all_to_all(const SendType* send_buffer, const int send_count, bindings::i_all_to_all( send_buffer, send_count, send_type, recv_buffer, recv_count == 0 ? send_count : recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -622,8 +622,7 @@ void all_to_all(const SendType* send_buffer, const int* send_counts, bindings::i_all_to_all_v( send_buffer, send_counts, send_offsets, send_type, recv_buffer, recv_counts, recv_offsets, recv_type, - comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + comm ? comm->get() : communicator::get_comm_world(), req->get()); } } @@ -641,7 +640,7 @@ void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, } else { bindings::i_scan(send_buffer, recv_buffer, count, scan_type, operation, comm ? comm->get() : communicator::get_comm_world(), - req->get_requests()); + req->get()); } } diff --git a/mpi/base/bindings.hpp b/mpi/base/bindings.hpp index f3e2ddb97ef..f073e30973d 100644 --- a/mpi/base/bindings.hpp +++ b/mpi/base/bindings.hpp @@ -221,6 +221,18 @@ inline void flush_all_local_windows(MPI_Win* win) } +inline void duplicate_info(MPI_Info in_info, MPI_Info* out_info) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_dup(in_info, out_info)); +} + + +inline void free_requests(MPI_Request* req) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Request_free(req)); +} + + inline void create_info(MPI_Info* info) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_create(info)); From 09ff8f584000b51054ea0f7d042945fd7b059180 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 4 Nov 2021 15:17:23 +0100 Subject: [PATCH 17/59] Review update 2: comm, req, and win --- include/ginkgo/core/base/mpi.hpp | 336 +++++++++++++++++++++++-------- mpi/base/bindings.cpp | 287 -------------------------- mpi/base/bindings.hpp | 205 ------------------- mpi/test/base/bindings.cpp | 98 ++++----- mpi/test/base/communicator.cpp | 66 ++---- 5 files changed, 318 insertions(+), 674 deletions(-) diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 6930f2f375a..20019a3f256 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -107,16 +107,41 @@ class init_finalize { return &instance; } - static bool is_finalized(); + static bool is_finalized() + { + int flag = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Finalized(&flag)); + return flag; + } - static bool is_initialized(); + static bool is_initialized() + { + int flag = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Initialized(&flag)); + return flag; + } private: - init_finalize(int& argc, char**& argv, const thread_type thread_t); + init_finalize(int& argc, char**& argv, const thread_type thread_t) + { + auto flag = is_initialized(); + if (!flag) { + this->required_thread_support_ = static_cast(thread_t); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Init_thread(&argc, &argv, this->required_thread_support_, + &(this->provided_thread_support_))); + } else { + // GKO_MPI_INITIALIZED; + } + } init_finalize() = delete; - ~init_finalize(); + ~init_finalize() + { + auto flag = is_finalized(); + if (!flag) MPI_Finalize(); + } int num_args_; int required_thread_support_; @@ -134,19 +159,38 @@ class info { public: info() : info_(MPI_INFO_NULL) {} - explicit info(MPI_Info input); + explicit info(MPI_Info input_info) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_dup(input_info, &this->info_)); + } - void create_default(); + void create_default() + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_create(&this->info_)); + } - void remove(std::string key); + void remove(std::string key) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_delete(this->info_, key.c_str())); + } std::string& at(std::string& key) { return this->key_value_.at(key); } - void add(std::string key, std::string value); + void add(std::string key, std::string value) + { + this->key_value_[key] = value; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Info_set(this->info_, key.c_str(), value.c_str())); + } MPI_Info get() { return this->info_; } - ~info(); + ~info() + { + if (this->info_ != MPI_INFO_NULL) { + MPI_Info_free(&this->info_); + } + } private: std::map key_value_; @@ -165,7 +209,10 @@ class request : public EnableSharedCreateMethod { request() : req_(new MPI_Request[1]) {} - void free(MPI_Request* req); + void free(MPI_Request* req) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Request_free(req)); + } ~request() { @@ -195,7 +242,7 @@ class status : public EnableSharedCreateMethod { if (status_) delete[] status_; } - MPI_Status* get_statuses() const { return status_; } + MPI_Status* get() const { return status_; } private: MPI_Status* status_; @@ -209,19 +256,68 @@ class status : public EnableSharedCreateMethod { */ class communicator : public EnableSharedCreateMethod { public: - communicator(const MPI_Comm& comm); + communicator(const MPI_Comm& comm) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(comm, &this->comm_)); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->local_rank_ = get_local_rank(); + } - communicator(const MPI_Comm& comm, int color, int key); + communicator(const MPI_Comm& comm, int color, int key) + { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Comm_split(comm, color, key, &this->comm_)); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->local_rank_ = get_local_rank(); + } - communicator(); + communicator() + { + this->comm_ = MPI_COMM_NULL; + this->size_ = 0; + this->rank_ = -1; + } - communicator(communicator& other); + communicator(communicator& other) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->local_rank_ = get_local_rank(); + } - communicator& operator=(const communicator& other); + communicator& operator=(const communicator& other) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->local_rank_ = get_local_rank(); + return *this; + } - communicator(communicator&& other); + communicator(communicator&& other) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->local_rank_ = get_local_rank(); + other.comm_ = MPI_COMM_NULL; + other.size_ = 0; + other.rank_ = -1; + } - communicator& operator=(communicator&& other); + communicator& operator=(communicator&& other) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->local_rank_ = get_local_rank(); + other.size_ = 0; + other.rank_ = -1; + return *this; + } static MPI_Comm get_comm_world() { return MPI_COMM_WORLD; } @@ -238,32 +334,89 @@ class communicator : public EnableSharedCreateMethod { int local_rank() const { return local_rank_; }; - bool compare(const MPI_Comm& other) const; + bool compare(const MPI_Comm& other) const + { + int flag; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(this->comm_, other, &flag)); + return flag; + } bool operator==(const communicator& rhs) { return compare(rhs.get()); } - ~communicator(); + ~communicator() + { + if (this->comm_ && this->comm_ != MPI_COMM_NULL) { + MPI_Comm_free(&this->comm_); + } + } private: MPI_Comm comm_; int size_{}; int rank_{}; int local_rank_{}; + + int get_my_rank() + { + int my_rank = 0; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(comm_, &my_rank)); + return my_rank; + } + + int get_local_rank() + { + MPI_Comm local_comm; + int rank; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split_type( + comm_, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(local_comm, &rank)); + MPI_Comm_free(&local_comm); + return rank; + } + + int get_num_ranks() + { + int size = 1; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(comm_, &size)); + return size; + } }; /** - * A type helper which can be used to create MPI_Datatype from other types. + * Get the rank in the communicator of the calling process. + * + * @param comm the communicator + */ +double get_walltime() { return MPI_Wtime(); } + + +/** + * This function is used to synchronize between the ranks of a given + * communicator. + * + * @param comm the communicator */ -class mpi_type { -public: - mpi_type(const int count, MPI_Datatype& old); - ~mpi_type(); - const MPI_Datatype& get() const { return this->type_; } +void synchronize(const communicator& comm = communicator::get_comm_world()) +{ + GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm.get())); +} -private: - MPI_Datatype type_{}; -}; + +/** + * Allows a rank to wait on a particular request handle. + * + * @param req The request to wait on. + * @param status The status variable that can be queried. + */ +void wait(std::shared_ptr req, std::shared_ptr status = {}) +{ + if (status.get()) { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(req->get(), status->get())); + } else { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(req->get(), MPI_STATUS_IGNORE)); + } +} /** @@ -289,80 +442,93 @@ class window { window(ValueType* base, unsigned int size, std::shared_ptr comm, const int disp_unit = sizeof(ValueType), info input_info = info(), - win_type create_type = win_type::create); + win_type create_type = win_type::create) + { + if (create_type == win_type::create) { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_create(base, size, disp_unit, input_info.get(), + comm->get(), &this->window_)); + } else if (create_type == win_type::dynamic_create) { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create_dynamic( + input_info.get(), comm->get(), &this->window_)); + } else if (create_type == win_type::allocate) { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_allocate(size, disp_unit, input_info.get(), comm->get(), + base, &this->window_)); + } else { + GKO_NOT_IMPLEMENTED; + } + } MPI_Win get() { return this->window_; } - void fence(int assert = 0); + void fence(int assert = 0) + { + if (&this->window_) { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_fence(assert, this->window_)); + } + } - void lock(int rank, int assert = 0, lock_type lock_t = lock_type::shared); + void lock(int rank, int assert = 0, lock_type lock_t = lock_type::shared) + { + if (lock_t == lock_type::shared) { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_lock(MPI_LOCK_SHARED, rank, assert, this->window_)); + } else if (lock_t == lock_type::exclusive) { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, assert, this->window_)); + } else { + GKO_NOT_IMPLEMENTED; + } + } - void unlock(int rank); + void unlock(int rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock(rank, this->window_)); + } - void lock_all(int assert = 0); + void lock_all(int assert = 0) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock_all(assert, this->window_)); + } - void unlock_all(); + void unlock_all() + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock_all(this->window_)); + } - void flush(int rank); + void flush(int rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush(rank, this->window_)); + } - void flush_local(int rank); + void flush_local(int rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local(rank, this->window_)); + } - void flush_all(); + void flush_all() + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_all(this->window_)); + } - void flush_all_local(); + void flush_all_local() + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(this->window_)); + } - ~window(); + ~window() + { + if (this->window_ && this->window_ != MPI_WIN_NULL) { + MPI_Win_free(&this->window_); + } + } private: MPI_Win window_; }; -/** - * This function is used to synchronize between the ranks of a given - * communicator. - * - * @param comm the communicator - */ -void synchronize(const communicator& comm = communicator::get_comm_world()); - - -/** - * Allows a rank to wait on a particular request handle. - * - * @param req The request to wait on. - * @param status The status variable that can be queried. - */ -void wait(std::shared_ptr req, std::shared_ptr status = {}); - - -double get_walltime(); - - -/** - * Get the rank in the communicator of the calling process. - * - * @param comm the communicator - */ -int get_my_rank(const communicator& comm = communicator::get_comm_world()); - - -/** - * Get the node local rank in the communicator of the calling process. - * - * @param comm the communicator - */ -int get_local_rank(const communicator& comm = communicator::get_comm_world()); - - -/** - * Get the number of ranks in the communicator of the calling process. - * - * @param comm the communicator - */ -int get_num_ranks(const communicator& comm = communicator::get_comm_world()); - - /** * Send data from calling process to destination rank. * diff --git a/mpi/base/bindings.cpp b/mpi/base/bindings.cpp index b0006131cce..14a7ecf005d 100644 --- a/mpi/base/bindings.cpp +++ b/mpi/base/bindings.cpp @@ -50,259 +50,6 @@ namespace gko { namespace mpi { -bool init_finalize::is_initialized() -{ - int flag = 0; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Initialized(&flag)); - return flag; -} - - -bool init_finalize::is_finalized() -{ - int flag = 0; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Finalized(&flag)); - return flag; -} - - -init_finalize::init_finalize(int& argc, char**& argv, - const thread_type thread_t) -{ - auto flag = is_initialized(); - if (!flag) { - this->required_thread_support_ = static_cast(thread_t); - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Init_thread(&argc, &argv, this->required_thread_support_, - &(this->provided_thread_support_))); - } else { - // GKO_MPI_INITIALIZED; - } -} - - -init_finalize::~init_finalize() -{ - auto flag = is_finalized(); - if (!flag) MPI_Finalize(); -} - - -mpi_type::mpi_type(const int count, MPI_Datatype& old) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_contiguous(count, old, &this->type_)); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_commit(&this->type_)); -} - - -mpi_type::~mpi_type() { MPI_Type_free(&(this->type_)); } - - -communicator::communicator(const MPI_Comm& comm) -{ - this->comm_ = bindings::duplicate_comm(comm); - this->size_ = bindings::get_comm_size(this->comm_); - this->rank_ = bindings::get_comm_rank(this->comm_); - this->local_rank_ = bindings::get_local_rank(this->comm_); -} - - -communicator::communicator(const MPI_Comm& comm_in, int color, int key) -{ - this->comm_ = bindings::create_comm(comm_in, color, key); - this->size_ = bindings::get_comm_size(this->comm_); - this->rank_ = bindings::get_comm_rank(this->comm_); - this->local_rank_ = bindings::get_local_rank(this->comm_); -} - - -communicator::communicator() -{ - this->comm_ = MPI_COMM_NULL; - this->size_ = 0; - this->rank_ = -1; -} - - -communicator::communicator(communicator& other) -{ - this->comm_ = bindings::duplicate_comm(other.comm_); - this->size_ = bindings::get_comm_size(this->comm_); - this->rank_ = bindings::get_comm_rank(this->comm_); - this->local_rank_ = bindings::get_local_rank(this->comm_); -} - - -communicator& communicator::operator=(const communicator& other) -{ - this->comm_ = bindings::duplicate_comm(other.comm_); - this->size_ = bindings::get_comm_size(this->comm_); - this->rank_ = bindings::get_comm_rank(this->comm_); - this->local_rank_ = bindings::get_local_rank(this->comm_); - return *this; -} - - -communicator::communicator(communicator&& other) -{ - this->comm_ = bindings::duplicate_comm(other.comm_); - this->size_ = bindings::get_comm_size(this->comm_); - this->rank_ = bindings::get_comm_rank(this->comm_); - this->local_rank_ = bindings::get_local_rank(this->comm_); - other.comm_ = MPI_COMM_NULL; - other.size_ = 0; - other.rank_ = -1; -} - - -communicator& communicator::operator=(communicator&& other) -{ - this->comm_ = bindings::duplicate_comm(other.comm_); - this->size_ = bindings::get_comm_size(this->comm_); - this->rank_ = bindings::get_comm_rank(this->comm_); - this->local_rank_ = bindings::get_local_rank(this->comm_); - other.size_ = 0; - other.rank_ = -1; - return *this; -} - - -void request::free(MPI_Request* req) { bindings::free_requests(this->req_); } - - -communicator::~communicator() { bindings::free_comm(this->comm_); } - - -void info::create_default() { bindings::create_info(&this->info_); } - - -info::info(MPI_Info info) { bindings::duplicate_info(info, &this->info_); } - - -void info::add(std::string key, std::string value) -{ - this->key_value_[key] = value; - bindings::add_info_key_value_pair(&this->info_, key.c_str(), value.c_str()); -} - - -void info::remove(std::string key) -{ - bindings::remove_info_key_value_pair(&this->info_, key.c_str()); -} - - -info::~info() -{ - if (this->info_ != MPI_INFO_NULL) bindings::free_info(&this->info_); -} - - -bool communicator::compare(const MPI_Comm& comm) const -{ - return bindings::compare_comm(this->comm_, comm); -} - - -template -window::window(ValueType* base, unsigned int size, - std::shared_ptr comm, - const int disp_unit, info input_info, - win_type create_type) -{ - if (create_type == win_type::create) { - bindings::create_window(base, size, disp_unit, input_info.get(), - comm->get(), &this->window_); - } else if (create_type == win_type::dynamic_create) { - bindings::create_dynamic_window(input_info.get(), comm->get(), - &this->window_); - } else if (create_type == win_type::allocate) { - bindings::allocate_window(size, disp_unit, input_info.get(), - comm->get(), base, &this->window_); - } else { - GKO_NOT_IMPLEMENTED; - } -} - - -template -void window::fence(int assert) -{ - bindings::fence_window(assert, &this->window_); -} - - -template -void window::lock(int rank, int assert, lock_type lock_t) -{ - if (lock_t == lock_type::shared) { - bindings::lock_window(MPI_LOCK_SHARED, rank, assert, &this->window_); - } else if (lock_t == lock_type::exclusive) { - bindings::lock_window(MPI_LOCK_EXCLUSIVE, rank, assert, &this->window_); - } else { - GKO_NOT_IMPLEMENTED; - } -} - - -template -void window::unlock(int rank) -{ - bindings::unlock_window(rank, &this->window_); -} - - -template -void window::lock_all(int assert) -{ - bindings::lock_all_windows(assert, &this->window_); -} - - -template -void window::unlock_all() -{ - bindings::unlock_all_windows(&this->window_); -} - - -template -void window::flush(int rank) -{ - bindings::flush_window(rank, &this->window_); -} - - -template -void window::flush_local(int rank) -{ - bindings::flush_local_window(rank, &this->window_); -} - - -template -void window::flush_all() -{ - bindings::flush_all_windows(&this->window_); -} - - -template -void window::flush_all_local() -{ - bindings::flush_all_local_windows(&this->window_); -} - - -template -window::~window() -{ - if (this->window_ && this->window_ != MPI_WIN_NULL) { - bindings::free_window(&this->window_); - } -} - - MPI_Op create_operation( const std::function func, void* arg1, void* arg2, int* len, MPI_Datatype* type) @@ -314,40 +61,6 @@ MPI_Op create_operation( } -double get_walltime() { return bindings::get_walltime(); } - - -int get_my_rank(const communicator& comm) -{ - return bindings::get_comm_rank(comm.get()); -} - - -int get_local_rank(const communicator& comm) -{ - return bindings::get_local_rank(comm.get()); -} - - -int get_num_ranks(const communicator& comm) -{ - return bindings::get_num_ranks(comm.get()); -} - - -void synchronize(const communicator& comm) { bindings::barrier(comm.get()); } - - -void wait(std::shared_ptr req, std::shared_ptr status) -{ - if (status.get()) { - bindings::wait(req->get(), status->get_statuses()); - } else { - bindings::wait(req->get(), MPI_STATUS_IGNORE); - } -} - - template void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, diff --git a/mpi/base/bindings.hpp b/mpi/base/bindings.hpp index f073e30973d..2f25abe1213 100644 --- a/mpi/base/bindings.hpp +++ b/mpi/base/bindings.hpp @@ -58,217 +58,12 @@ namespace mpi { namespace bindings { -inline double get_walltime() { return MPI_Wtime(); } - - -inline int get_comm_size(const MPI_Comm& comm) -{ - int size = 0; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(comm, &size)); - return size; -} - - -inline int get_comm_rank(const MPI_Comm& comm) -{ - int my_rank = 0; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(comm, &my_rank)); - return my_rank; -} - - -inline int get_local_rank(const MPI_Comm& comm) -{ - MPI_Comm local_comm; - int rank; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, - MPI_INFO_NULL, &local_comm)); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(local_comm, &rank)); - MPI_Comm_free(&local_comm); - return rank; -} - - -inline int get_num_ranks(const MPI_Comm& comm) -{ - int size = 1; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(comm, &size)); - return size; -} - - -inline void barrier(const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm)); -} - - -inline MPI_Comm create_comm(const MPI_Comm& comm_in, int color, int key) -{ - MPI_Comm comm_out; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm_in, color, key, &comm_out)); - return comm_out; -} - - -inline MPI_Comm duplicate_comm(const MPI_Comm& comm) -{ - MPI_Comm dup; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(comm, &dup)); - return dup; -} - - -inline bool compare_comm(const MPI_Comm& comm1, const MPI_Comm comm2) -{ - int flag; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(comm1, comm2, &flag)); - return flag; -} - - -inline void free_comm(MPI_Comm comm) -{ - if (comm && comm != MPI_COMM_NULL) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(&comm)); - } -} - - -inline void create_window(void* base, unsigned int size, const int disp_unit, - MPI_Info info, const MPI_Comm comm, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Win_create(base, size, disp_unit, info, comm, win)); -} - - -inline void create_dynamic_window(MPI_Info info, const MPI_Comm comm, - MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create_dynamic(info, comm, win)); -} - - -inline void allocate_window(unsigned int size, const int disp_unit, - MPI_Info info, const MPI_Comm comm, void* base, - MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Win_allocate(size, disp_unit, info, comm, base, win)); -} - - -inline void free_window(MPI_Win* win) -{ - if (win) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_free(win)); - } -} - - -inline void fence_window(int assert, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_fence(assert, *win)); -} - - -inline void lock_window(int lock_t, int rank, int assert, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock(lock_t, rank, assert, *win)); -} - - -inline void unlock_window(int rank, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock(rank, *win)); -} - - -inline void lock_all_windows(int assert, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock_all(assert, *win)); -} - - -inline void unlock_all_windows(MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock_all(*win)); -} - - -inline void flush_window(int rank, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush(rank, *win)); -} - - -inline void flush_local_window(int rank, MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local(rank, *win)); -} - - -inline void flush_all_windows(MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_all(*win)); -} - - -inline void flush_all_local_windows(MPI_Win* win) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(*win)); -} - - -inline void duplicate_info(MPI_Info in_info, MPI_Info* out_info) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_dup(in_info, out_info)); -} - - -inline void free_requests(MPI_Request* req) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Request_free(req)); -} - - -inline void create_info(MPI_Info* info) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_create(info)); -} - - -inline void add_info_key_value_pair(MPI_Info* info, const char* key, - const char* value) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_set(*info, key, value)); -} - - -inline void remove_info_key_value_pair(MPI_Info* info, const char* key) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_delete(*info, key)); -} - - -inline void free_info(MPI_Info* info) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_free(info)); -} - - inline void create_op(MPI_User_function* func, int commute, MPI_Op* op) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Op_create(func, commute, op)); } -inline void wait(MPI_Request* request, MPI_Status* status) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(request, status)); -} - template inline const T* in_place() { diff --git a/mpi/test/base/bindings.cpp b/mpi/test/base/bindings.cpp index dfbd958106b..8cfaa6e207c 100644 --- a/mpi/test/base/bindings.cpp +++ b/mpi/test/base/bindings.cpp @@ -82,7 +82,7 @@ TEST_F(MpiBindings, CanCreateWindow) using ValueType = int; ValueType* data; data = new ValueType[4]{1, 2, 3, 4}; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto win = gko::mpi::window(data, 4 * sizeof(ValueType), comm); ASSERT_NE(win.get(), MPI_WIN_NULL); win.lock_all(); @@ -94,9 +94,9 @@ TEST_F(MpiBindings, CanCreateWindow) TEST_F(MpiBindings, CanSendAndRecvValues) { using ValueType = int; - auto comm = gko::mpi::communicator(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm); - auto num_ranks = gko::mpi::get_num_ranks(comm); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; auto recv_array = gko::Array{ref}; ValueType* data; @@ -126,9 +126,9 @@ TEST_F(MpiBindings, CanSendAndRecvValues) TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) { using ValueType = int; - auto comm = gko::mpi::communicator(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm); - auto num_ranks = gko::mpi::get_num_ranks(comm); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; auto recv_array = gko::Array{ref}; ValueType* data; @@ -162,7 +162,7 @@ TEST_F(MpiBindings, CanPutValuesWithLockAll) { using ValueType = int; using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); int* data; @@ -195,7 +195,7 @@ TEST_F(MpiBindings, CanPutValuesWithExclusiveLock) { using ValueType = int; using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); int* data; @@ -228,7 +228,7 @@ TEST_F(MpiBindings, CanPutValuesWithFence) { using ValueType = int; using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; @@ -262,7 +262,7 @@ TEST_F(MpiBindings, CanGetValuesWithLockAll) { using ValueType = int; using Window = gko::mpi::window; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; @@ -297,7 +297,7 @@ TEST_F(MpiBindings, CanGetValuesWithExclusiveLock) { using ValueType = int; using Window = gko::mpi::window; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; @@ -332,7 +332,7 @@ TEST_F(MpiBindings, CanGetValuesWithFence) { using ValueType = int; using Window = gko::mpi::window; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); + auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; @@ -364,9 +364,9 @@ TEST_F(MpiBindings, CanGetValuesWithFence) TEST_F(MpiBindings, CanBroadcastValues) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); double* data; auto array = gko::Array{ref, 8}; if (my_rank == 0) { @@ -395,9 +395,9 @@ TEST_F(MpiBindings, CanBroadcastValues) TEST_F(MpiBindings, CanReduceValues) { using ValueType = double; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); ValueType data, sum, max, min; if (my_rank == 0) { data = 3; @@ -421,9 +421,9 @@ TEST_F(MpiBindings, CanReduceValues) TEST_F(MpiBindings, CanAllReduceValues) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); int data, sum; if (my_rank == 0) { data = 3; @@ -441,9 +441,9 @@ TEST_F(MpiBindings, CanAllReduceValues) TEST_F(MpiBindings, CanAllReduceValuesInPlace) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); int data; if (my_rank == 0) { data = 3; @@ -461,9 +461,9 @@ TEST_F(MpiBindings, CanAllReduceValuesInPlace) TEST_F(MpiBindings, CanScatterValues) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); double* data; auto scatter_from_array = gko::Array{ref->get_master()}; if (my_rank == 0) { @@ -497,9 +497,9 @@ TEST_F(MpiBindings, CanScatterValues) TEST_F(MpiBindings, CanGatherValues) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); int data; if (my_rank == 0) { data = 3; @@ -524,9 +524,9 @@ TEST_F(MpiBindings, CanGatherValues) TEST_F(MpiBindings, CanScatterValuesWithDisplacements) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); double* data; auto scatter_from_array = gko::Array{ref}; auto scatter_into_array = gko::Array{ref}; @@ -579,9 +579,9 @@ TEST_F(MpiBindings, CanScatterValuesWithDisplacements) TEST_F(MpiBindings, CanGatherValuesWithDisplacements) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); double* data; auto gather_from_array = gko::Array{ref}; auto gather_into_array = gko::Array{ref}; @@ -642,9 +642,9 @@ TEST_F(MpiBindings, CanGatherValuesWithDisplacements) TEST_F(MpiBindings, AllToAllWorksCorrectly) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; auto recv_array = gko::Array{ref}; auto ref_array = gko::Array{ref}; @@ -671,9 +671,9 @@ TEST_F(MpiBindings, AllToAllWorksCorrectly) TEST_F(MpiBindings, AllToAllInPlaceWorksCorrectly) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); auto recv_array = gko::Array{ref}; auto ref_array = gko::Array{ref}; recv_array = gko::Array{ref, 4}; @@ -698,9 +698,9 @@ TEST_F(MpiBindings, AllToAllInPlaceWorksCorrectly) TEST_F(MpiBindings, AllToAllVWorksCorrectly) { - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); auto send_array = gko::Array{ref}; auto recv_array = gko::Array{ref}; auto ref_array = gko::Array{ref}; @@ -753,9 +753,9 @@ TEST_F(MpiBindings, AllToAllVWorksCorrectly) TEST_F(MpiBindings, CanScanValues) { using ValueType = double; - auto comm = gko::mpi::communicator::create(MPI_COMM_WORLD); - auto my_rank = gko::mpi::get_my_rank(comm->get()); - auto num_ranks = gko::mpi::get_num_ranks(comm->get()); + auto comm = gko::mpi::communicator::create_world(); + auto my_rank = comm->rank(); + auto num_ranks = comm->size(); ValueType data, sum, max, min; if (my_rank == 0) { data = 3; diff --git a/mpi/test/base/communicator.cpp b/mpi/test/base/communicator.cpp index 9659598c8c6..9fdd2e2a471 100644 --- a/mpi/test/base/communicator.cpp +++ b/mpi/test/base/communicator.cpp @@ -58,8 +58,8 @@ class Communicator : public ::testing::Test { void SetUp() { - rank = gko::mpi::get_my_rank(comm); - ASSERT_EQ(gko::mpi::get_num_ranks(comm), 8); + rank = comm.rank(); + ASSERT_EQ(comm.size(), 8); } gko::mpi::communicator comm; @@ -95,20 +95,18 @@ TEST_F(Communicator, KnowsItsCommunicator) TEST_F(Communicator, CommunicatorCanBeCopied) { - auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); - auto copy = comm_world; + auto copy = comm; - EXPECT_EQ(comm_world.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(comm.compare(MPI_COMM_WORLD), true); EXPECT_EQ(copy.compare(MPI_COMM_WORLD), true); } TEST_F(Communicator, CommunicatorCanBeCopyConstructed) { - auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); - auto copy = gko::mpi::communicator(comm_world); + auto copy = gko::mpi::communicator(comm); - EXPECT_EQ(comm_world.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(comm.compare(MPI_COMM_WORLD), true); EXPECT_EQ(copy.compare(MPI_COMM_WORLD), true); } @@ -116,28 +114,25 @@ TEST_F(Communicator, CommunicatorCanBeCopyConstructed) TEST_F(Communicator, CommunicatorCanBeMoved) { int size; - auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); - + auto comm_world = gko::mpi::communicator::create_world(); auto moved = std::move(comm_world); MPI_Comm_size(MPI_COMM_WORLD, &size); - EXPECT_EQ(comm_world.get(), MPI_COMM_NULL); - EXPECT_EQ(comm_world.size(), 0); - EXPECT_EQ(moved.compare(MPI_COMM_WORLD), true); - EXPECT_EQ(moved.size(), size); + EXPECT_EQ(comm_world, nullptr); + EXPECT_EQ(moved->compare(MPI_COMM_WORLD), true); + EXPECT_EQ(moved->size(), size); } TEST_F(Communicator, CommunicatorCanBeMoveConstructed) { int size; - auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); - - auto moved = gko::mpi::communicator(std::move(comm_world)); + auto comm_world = gko::mpi::communicator::create_world(); + auto moved = gko::mpi::communicator(std::move(*comm_world.get())); MPI_Comm_size(MPI_COMM_WORLD, &size); - EXPECT_EQ(comm_world.get(), MPI_COMM_NULL); - EXPECT_EQ(comm_world.size(), 0); + EXPECT_EQ(comm_world->get(), MPI_COMM_NULL); + EXPECT_EQ(comm_world->size(), 0); EXPECT_EQ(moved.compare(MPI_COMM_WORLD), true); EXPECT_EQ(moved.size(), size); } @@ -147,26 +142,15 @@ TEST_F(Communicator, CommKnowsItsSize) { int size; MPI_Comm_size(MPI_COMM_WORLD, &size); - auto comm = gko::mpi::communicator(MPI_COMM_WORLD); EXPECT_EQ(comm.size(), size); } -TEST_F(Communicator, KnowsItsSize) -{ - int size; - MPI_Comm_size(MPI_COMM_WORLD, &size); - - EXPECT_EQ(gko::mpi::get_num_ranks(MPI_COMM_WORLD), size); -} - - TEST_F(Communicator, CommKnowsItsRank) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - auto comm = gko::mpi::communicator(MPI_COMM_WORLD); EXPECT_EQ(comm.rank(), rank); } @@ -176,7 +160,6 @@ TEST_F(Communicator, CommKnowsItsLocalRank) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - auto comm = gko::mpi::communicator(MPI_COMM_WORLD); // Expect local rank to be same as rank when on one node EXPECT_EQ(comm.local_rank(), rank); @@ -188,32 +171,19 @@ TEST_F(Communicator, KnowsItsRanks) int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - EXPECT_EQ(rank, gko::mpi::get_my_rank(MPI_COMM_WORLD)); -} - - -TEST_F(Communicator, KnowsItsDefaultCommunicator) -{ - auto comm_world = gko::mpi::communicator(MPI_COMM_WORLD); - ASSERT_TRUE(comm_world == comm); -} - - -TEST_F(Communicator, KnowsNumRanks) -{ - EXPECT_EQ(gko::mpi::get_num_ranks(comm), 8); + EXPECT_EQ(comm.rank(), rank); } TEST_F(Communicator, CanSetCustomCommunicator) { - auto world_rank = gko::mpi::get_my_rank(comm); - auto world_size = gko::mpi::get_num_ranks(comm); + auto world_rank = comm.rank(); + auto world_size = comm.size(); auto color = world_rank / 4; auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); for (auto i = 0; i < world_size; ++i) { - EXPECT_LT(gko::mpi::get_my_rank(row_comm.get()), 4); + EXPECT_LT(row_comm.rank(), 4); } } From ff15e67f37c04668c9caa5411583acabdcfcabba Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 4 Nov 2021 17:24:41 +0100 Subject: [PATCH 18/59] WIP: Moving funcs to public header --- include/ginkgo/core/base/mpi.hpp | 370 +++++++++++++++++++++++++--- mpi/base/bindings.cpp | 308 ----------------------- mpi/base/bindings.hpp | 144 ----------- mpi/base/helpers.hpp | 44 +--- mpi/test/base/bindings.cpp | 8 +- mpi/test/base/communicator.cpp | 5 - mpi/test/base/exception_helpers.cpp | 1 - 7 files changed, 341 insertions(+), 539 deletions(-) diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 20019a3f256..55004c7109f 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -528,9 +528,39 @@ class window { MPI_Win window_; }; +namespace detail { + +template +inline const T* in_place() +{ + return reinterpret_cast(MPI_IN_PLACE); +} + +} // namespace detail + /** - * Send data from calling process to destination rank. + * Send (Blocking) data from calling process to destination rank. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param destination_rank the rank to send the data to + * @param send_tag the tag for the send call + * @param comm the communicator + */ +template +void send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, send_type, + destination_rank, send_tag, comm->get())); +} + + +/** + * Send (Non-blocking) data from calling process to destination rank. * * @param send_buffer the buffer to send * @param send_count the number of elements to send @@ -542,8 +572,36 @@ class window { template void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, - std::shared_ptr req = {}, - std::shared_ptr comm = {}); + std::shared_ptr req, + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + + GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, send_type, + destination_rank, send_tag, comm->get(), + req->get())); +} + + +/** + * Receive data from source rank. + * + * @param recv_buffer the buffer to send + * @param recv_count the number of elements to send + * @param source_rank the rank to send the data to + * @param recv_tag the tag for the send call + * @param comm the communicator + */ +template +void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, + const int recv_tag, std::shared_ptr status, + std::shared_ptr comm) +{ + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Recv(recv_buffer, recv_count, recv_type, source_rank, recv_tag, + comm->get(), status ? status->get() : MPI_STATUS_IGNORE)); +} /** @@ -558,9 +616,36 @@ void send(const SendType* send_buffer, const int send_count, */ template void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, - const int recv_tag, std::shared_ptr req = {}, - std::shared_ptr status = {}, - std::shared_ptr comm = {}); + const int recv_tag, std::shared_ptr req, + std::shared_ptr comm) +{ + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, recv_type, + source_rank, recv_tag, comm->get(), + req->get())); +} + + +/** + * Put data into the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to put + * @param target_rank the rank to put the data to + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + * @param window the window to put the data into + */ +template +void put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) +{ + auto put_type = helpers::get_mpi_type(origin_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Put(origin_buffer, origin_count, put_type, + target_rank, target_disp, target_count, + put_type, window.get())); +} /** @@ -578,7 +663,35 @@ template void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window, - std::shared_ptr req = {}); + std::shared_ptr req) +{ + auto put_type = helpers::get_mpi_type(origin_buffer[0]); + bindings::req_put(origin_buffer, origin_count, put_type, target_rank, + target_disp, target_count, put_type, window.get(), + req->get()); +} + + +/** + * Get data from the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to get + * @param target_rank the rank to get the data from + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + * @param window the window to put the data into + */ +template +void get(GetType* origin_buffer, const int origin_count, const int target_rank, + const unsigned int target_disp, const int target_count, + window& window) +{ + auto get_type = helpers::get_mpi_type(origin_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Get(origin_buffer, origin_count, get_type, + target_rank, target_disp, target_count, + get_type, window.get())); +} /** @@ -595,7 +708,13 @@ void put(const PutType* origin_buffer, const int origin_count, template void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, - window& window, std::shared_ptr req = {}); + window& window, std::shared_ptr req) +{ + auto get_type = helpers::get_mpi_type(origin_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(origin_buffer, origin_count, get_type, + target_rank, target_disp, target_count, + get_type, window, req->get())); +} /** @@ -608,7 +727,35 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, */ template void broadcast(BroadcastType* buffer, int count, int root_rank, - std::shared_ptr comm = {}); + std::shared_ptr comm) +{ + auto bcast_type = helpers::get_mpi_type(buffer[0]); + bindings::broadcast(buffer, count, bcast_type, root_rank, ); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Bcast(buffer, count, bcast_type, root_rank, comm->get())); +} + + +/** + * Reduce data into root from all calling processes on the same communicator. + * + * @param send_buffer the buffer to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param op_enum the reduce operation. See @op_type + * @param comm the communicator + */ +template +void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, + op_type op_enum = op_type::sum, int root_rank = 0, + std::shared_ptr comm) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(send_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, + reduce_type, operation, root_rank, + comm->get())); +} /** @@ -623,9 +770,37 @@ void broadcast(BroadcastType* buffer, int count, int root_rank, */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum, int root_rank, - std::shared_ptr comm = {}, - std::shared_ptr req = {}); + op_type op_enum = op_type::sum, int root_rank = 0, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(send_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, + reduce_type, operation, root_rank, + comm->get(), req->get())); +} + + +/** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param recv_buffer the data to reduce and the reduced result + * @param count the number of elements to reduce + * @param op_enum the reduce operation. See @op_type + * @param comm the communicator + */ +template +void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, + std::shared_ptr comm) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(detail::in_place(), + recv_buffer, count, reduce_type, + operation, comm->get())); +} /** @@ -639,10 +814,16 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, * @param req the request handle */ template -void all_reduce(ReduceType* recv_buffer, int count, - op_type op_enum = op_type::sum, - std::shared_ptr comm = {}, - std::shared_ptr req = {}); +void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Iallreduce(detail::in_place(), recv_buffer, count, + reduce_type, operation, comm->get(), req->get())); +} /** @@ -658,9 +839,39 @@ void all_reduce(ReduceType* recv_buffer, int count, */ template void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, op_type op_enum = op_type::sum, - std::shared_ptr comm = {}, - std::shared_ptr req = {}); + int count, op_type op_enum, + std::shared_ptr comm) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( + send_buffer, recv_buffer, count, reduce_type, operation, comm->get())); +} + + +/** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param send_buffer the data to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param op_enum the reduce operation. See @op_type + * @param comm the communicator + * @param req the request handle + */ +template +void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, op_type op_enum, + std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = helpers::get_operation(op_enum); + auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, + reduce_type, operation, comm->get(), + req->get())); +} /** @@ -676,7 +887,14 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, template void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm = {}); + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::gather(send_buffer, send_count, send_type, recv_buffer, + recv_count, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} /** @@ -695,7 +913,14 @@ template void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int* recv_counts, const int* displacements, int root_rank, - std::shared_ptr comm = {}); + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::gatherv(send_buffer, send_count, send_type, recv_buffer, + recv_counts, displacements, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} /** @@ -710,7 +935,14 @@ void gather(const SendType* send_buffer, const int send_count, template void all_gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm = {}); + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::all_gather(send_buffer, send_count, send_type, recv_buffer, + recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world()); +} /** @@ -725,7 +957,14 @@ void all_gather(const SendType* send_buffer, const int send_count, template void scatter(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm = {}); + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::scatter(send_buffer, send_count, send_type, recv_buffer, + recv_count, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} /** @@ -742,7 +981,14 @@ template void scatter(const SendType* send_buffer, const int* send_counts, const int* displacements, RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm = {}); + std::shared_ptr comm) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + bindings::scatterv(send_buffer, send_counts, displacements, send_type, + recv_buffer, recv_count, recv_type, root_rank, + comm ? comm->get() : communicator::get_comm_world()); +} /** @@ -758,9 +1004,23 @@ void scatter(const SendType* send_buffer, const int* send_counts, * are the same. */ template -void all_to_all(RecvType* buffer, const int recv_count, +void all_to_all(RecvType* recv_buffer, const int recv_count, std::shared_ptr comm = {}, - std::shared_ptr req = {}); + std::shared_ptr req) +{ + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::all_to_all( + bindings::in_place(), recv_count, recv_type, recv_buffer, + recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_to_all( + bindings::in_place(), recv_count, recv_type, recv_buffer, + recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world(), req->get()); + } +} /** @@ -776,9 +1036,24 @@ void all_to_all(RecvType* buffer, const int recv_count, */ template void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count = {}, + RecvType* recv_buffer, const int recv_count{}, std::shared_ptr comm = {}, - std::shared_ptr req = {}); + std::shared_ptr req) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::all_to_all( + send_buffer, send_count, send_type, recv_buffer, + recv_count == 0 ? send_count : recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_to_all( + send_buffer, send_count, send_type, recv_buffer, + recv_count == 0 ? send_count : recv_count, recv_type, + comm ? comm->get() : communicator::get_comm_world(), req->get()); + } +} /** @@ -799,9 +1074,24 @@ template void all_to_all(const SendType* send_buffer, const int* send_counts, const int* send_offsets, RecvType* recv_buffer, const int* recv_counts, const int* recv_offsets, - const int stride = 1, - std::shared_ptr comm = {}, - std::shared_ptr req = {}); + const int stride, std::shared_ptr comm = {}, + std::shared_ptr req = {}) +{ + auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + + if (!req.get()) { + bindings::all_to_all_v( + send_buffer, send_counts, send_offsets, send_type, recv_buffer, + recv_counts, recv_offsets, recv_type, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_all_to_all_v( + send_buffer, send_counts, send_offsets, send_type, recv_buffer, + recv_counts, recv_offsets, recv_type, + comm ? comm->get() : communicator::get_comm_world(), req->get()); + } +} /** @@ -815,11 +1105,23 @@ void all_to_all(const SendType* send_buffer, const int* send_counts, * @param comm the communicator * @param req the request handle */ -template -void scan(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, +template +void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, op_type op_enum = op_type::sum, std::shared_ptr comm = {}, - std::shared_ptr req = {}); + std::shared_ptr req = {}) +{ + auto operation = helpers::get_operation(op_enum); + auto scan_type = helpers::get_mpi_type(recv_buffer[0]); + if (!req.get()) { + bindings::scan(send_buffer, recv_buffer, count, scan_type, operation, + comm ? comm->get() : communicator::get_comm_world()); + } else { + bindings::i_scan(send_buffer, recv_buffer, count, scan_type, operation, + comm ? comm->get() : communicator::get_comm_world(), + req->get()); + } +} } // namespace mpi diff --git a/mpi/base/bindings.cpp b/mpi/base/bindings.cpp index 14a7ecf005d..e4313862838 100644 --- a/mpi/base/bindings.cpp +++ b/mpi/base/bindings.cpp @@ -50,314 +50,6 @@ namespace gko { namespace mpi { -MPI_Op create_operation( - const std::function func, - void* arg1, void* arg2, int* len, MPI_Datatype* type) -{ - MPI_Op operation; - bindings::create_op(func.target(), - true, &operation); - return operation; -} - - -template -void send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - std::shared_ptr req, - std::shared_ptr comm) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - if (!req.get()) { - bindings::send(send_buffer, send_count, send_type, destination_rank, - send_tag, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_send( - send_buffer, send_count, send_type, destination_rank, send_tag, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, - const int recv_tag, std::shared_ptr req, - std::shared_ptr status, - std::shared_ptr comm) -{ - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::recv(recv_buffer, recv_count, recv_type, source_rank, - recv_tag, - comm ? comm->get() : communicator::get_comm_world(), - MPI_STATUS_IGNORE); - } else { - bindings::i_recv( - recv_buffer, recv_count, recv_type, source_rank, recv_tag, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window, - std::shared_ptr req) -{ - auto put_type = helpers::get_mpi_type(origin_buffer[0]); - if (!req.get()) { - bindings::put(origin_buffer, origin_count, put_type, target_rank, - target_disp, target_count, put_type, window.get()); - } else { - bindings::req_put(origin_buffer, origin_count, put_type, target_rank, - target_disp, target_count, put_type, window.get(), - req->get()); - } -} - - -template -void get(GetType* origin_buffer, const int origin_count, const int target_rank, - const unsigned int target_disp, const int target_count, - window& window, std::shared_ptr req) -{ - auto get_type = helpers::get_mpi_type(origin_buffer[0]); - if (!req.get()) { - bindings::get(origin_buffer, origin_count, get_type, target_rank, - target_disp, target_count, get_type, window.get()); - } else { - bindings::req_get(origin_buffer, origin_count, get_type, target_rank, - target_disp, target_count, get_type, window.get(), - req->get()); - } -} - - -template -void broadcast(BroadcastType* buffer, int count, int root_rank, - std::shared_ptr comm) -{ - auto bcast_type = helpers::get_mpi_type(buffer[0]); - bindings::broadcast(buffer, count, bcast_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); -} - - -template -void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum, int root_rank, - std::shared_ptr comm, - std::shared_ptr req) -{ - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(send_buffer[0]); - if (!req.get()) { - bindings::reduce(send_buffer, recv_buffer, count, reduce_type, - operation, root_rank, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_reduce( - send_buffer, recv_buffer, count, reduce_type, operation, root_rank, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, - std::shared_ptr comm, - std::shared_ptr req) -{ - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::all_reduce( - bindings::in_place(), recv_buffer, count, reduce_type, - operation, comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_reduce( - bindings::in_place(), recv_buffer, count, reduce_type, - operation, comm ? comm->get() : communicator::get_comm_world(), - req->get()); - } -} - - -template -void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, op_type op_enum, - std::shared_ptr comm, - std::shared_ptr req) -{ - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::all_reduce( - send_buffer, recv_buffer, count, reduce_type, operation, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_reduce( - send_buffer, recv_buffer, count, reduce_type, operation, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::gather(send_buffer, send_count, send_type, recv_buffer, - recv_count, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); -} - - -template -void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank, - std::shared_ptr comm) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::gatherv(send_buffer, send_count, send_type, recv_buffer, - recv_counts, displacements, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); -} - - -template -void all_gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::all_gather(send_buffer, send_count, send_type, recv_buffer, - recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world()); -} - - -template -void scatter(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::scatter(send_buffer, send_count, send_type, recv_buffer, - recv_count, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); -} - - -template -void scatter(const SendType* send_buffer, const int* send_counts, - const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank, - std::shared_ptr comm) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::scatterv(send_buffer, send_counts, displacements, send_type, - recv_buffer, recv_count, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); -} - - -template -void all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm, - std::shared_ptr req) -{ - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::all_to_all( - bindings::in_place(), recv_count, recv_type, recv_buffer, - recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_to_all( - bindings::in_place(), recv_count, recv_type, recv_buffer, - recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm, - std::shared_ptr req) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::all_to_all( - send_buffer, send_count, send_type, recv_buffer, - recv_count == 0 ? send_count : recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_to_all( - send_buffer, send_count, send_type, recv_buffer, - recv_count == 0 ? send_count : recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void all_to_all(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const int stride, std::shared_ptr comm, - std::shared_ptr req) -{ - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - - // auto new_type = mpi_type(stride, send_type); - - if (!req.get()) { - bindings::all_to_all_v( - send_buffer, send_counts, send_offsets, send_type, recv_buffer, - recv_counts, recv_offsets, recv_type, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_to_all_v( - send_buffer, send_counts, send_offsets, send_type, recv_buffer, - recv_counts, recv_offsets, recv_type, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } -} - - -template -void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum, std::shared_ptr comm, - std::shared_ptr req) -{ - auto operation = helpers::get_operation(op_enum); - auto scan_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::scan(send_buffer, recv_buffer, count, scan_type, operation, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_scan(send_buffer, recv_buffer, count, scan_type, operation, - comm ? comm->get() : communicator::get_comm_world(), - req->get()); - } -} - - #define GKO_DECLARE_WINDOW(ValueType) class window GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_WINDOW); diff --git a/mpi/base/bindings.hpp b/mpi/base/bindings.hpp index 2f25abe1213..85ea71a87c4 100644 --- a/mpi/base/bindings.hpp +++ b/mpi/base/bindings.hpp @@ -58,150 +58,6 @@ namespace mpi { namespace bindings { -inline void create_op(MPI_User_function* func, int commute, MPI_Op* op) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Op_create(func, commute, op)); -} - - -template -inline const T* in_place() -{ - return reinterpret_cast(MPI_IN_PLACE); -} - -inline void send(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, const int destination_rank, - const int send_tag, const MPI_Comm comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, send_type, - destination_rank, send_tag, comm)); -} - - -inline void recv(void* recv_buffer, const int recv_count, - MPI_Datatype& recv_type, const int source_rank, - const int recv_tag, const MPI_Comm comm, MPI_Status* status) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv(recv_buffer, recv_count, recv_type, - source_rank, recv_tag, comm, status)); -} - - -inline void i_send(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, const int destination_rank, - const int send_tag, const MPI_Comm comm, - MPI_Request* request) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, send_type, - destination_rank, send_tag, comm, - request)); -} - - -inline void i_recv(void* recv_buffer, const int recv_count, - MPI_Datatype& recv_type, const int source_rank, - const int recv_tag, const MPI_Comm comm, - MPI_Request* request) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, recv_type, - source_rank, recv_tag, comm, request)); -} - - -inline void put(const void* origin_buffer, const int origin_count, - const MPI_Datatype& origin_type, const int target_rank, - const unsigned int target_disp, const int target_count, - const MPI_Datatype& target_type, MPI_Win window) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Put(origin_buffer, origin_count, origin_type, - target_rank, target_disp, target_count, - target_type, window)); -} - - -inline void req_put(const void* origin_buffer, const int origin_count, - const MPI_Datatype& origin_type, const int target_rank, - const unsigned int target_disp, const int target_count, - const MPI_Datatype& target_type, MPI_Win window, - MPI_Request* request) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput(origin_buffer, origin_count, origin_type, - target_rank, target_disp, target_count, - target_type, window, request)); -} - - -inline void get(void* origin_buffer, const int origin_count, - const MPI_Datatype& origin_type, const int target_rank, - const unsigned int target_disp, const int target_count, - const MPI_Datatype& target_type, MPI_Win window) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Get(origin_buffer, origin_count, origin_type, - target_rank, target_disp, target_count, - target_type, window)); -} - - -inline void req_get(void* origin_buffer, const int origin_count, - const MPI_Datatype& origin_type, const int target_rank, - const unsigned int target_disp, const int target_count, - const MPI_Datatype& target_type, MPI_Win window, - MPI_Request* request) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(origin_buffer, origin_count, origin_type, - target_rank, target_disp, target_count, - target_type, window, request)); -} - - -inline void broadcast(void* buffer, int count, MPI_Datatype& bcast_type, - int root_rank, const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Bcast(buffer, count, bcast_type, root_rank, comm)); -} - - -inline void reduce(const void* send_buffer, void* recv_buffer, int count, - MPI_Datatype& reduce_type, MPI_Op operation, int root_rank, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, - reduce_type, operation, root_rank, - comm)); -} - - -inline void all_reduce(const void* send_buffer, void* recv_buffer, int count, - MPI_Datatype& reduce_type, MPI_Op operation, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(send_buffer, recv_buffer, count, - reduce_type, operation, comm)); -} - - -inline void i_reduce(const void* send_buffer, void* recv_buffer, int count, - MPI_Datatype& reduce_type, MPI_Op operation, int root_rank, - const MPI_Comm& comm, MPI_Request* requests) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, - reduce_type, operation, root_rank, - comm, requests)); -} - - -inline void i_all_reduce(const void* send_buffer, void* recv_buffer, int count, - MPI_Datatype& reduce_type, MPI_Op operation, - const MPI_Comm& comm, MPI_Request* requests) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, - reduce_type, operation, comm, - requests)); -} - - inline void gather(const void* send_buffer, const int send_count, MPI_Datatype& send_type, void* recv_buffer, const int recv_count, MPI_Datatype& recv_type, int root, diff --git a/mpi/base/helpers.hpp b/mpi/base/helpers.hpp index d569a568760..f643ed73e0a 100644 --- a/mpi/base/helpers.hpp +++ b/mpi/base/helpers.hpp @@ -75,27 +75,8 @@ GKO_MPI_DATATYPE(long, MPI_LONG); GKO_MPI_DATATYPE(float, MPI_FLOAT); GKO_MPI_DATATYPE(double, MPI_DOUBLE); GKO_MPI_DATATYPE(long double, MPI_LONG_DOUBLE); -GKO_MPI_DATATYPE(std::complex, MPI_COMPLEX); -GKO_MPI_DATATYPE(std::complex, MPI_DOUBLE_COMPLEX); - - -namespace detail { -namespace operations { - -template -void custom(void* in, void* inout, int* size, MPI_Datatype*) -{ - auto l_in = reinterpret_cast(in); - auto l_inout = reinterpret_cast(inout); - ValueType sum = 0.0; - for (auto i = 0; i < *size; ++i) { - } - *l_inout = (*l_in); -} - - -} // namespace operations -} // namespace detail +GKO_MPI_DATATYPE(std::complex, MPI_C_COMPLEX); +GKO_MPI_DATATYPE(std::complex, MPI_C_DOUBLE_COMPLEX); template @@ -104,51 +85,30 @@ MPI_Op get_operation(gko::mpi::op_type op) switch (op) { case gko::mpi::op_type::sum: return MPI_SUM; - break; case gko::mpi::op_type::min: return MPI_MIN; - break; case gko::mpi::op_type::max: return MPI_MAX; - break; case gko::mpi::op_type::product: return MPI_PROD; - break; - case gko::mpi::op_type::custom: { - // TEMPLATE to create custom operations - MPI_Op op; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Op_create(detail::operations::custom, true, &op)); - return op; - break; - } case gko::mpi::op_type::logical_and: return MPI_LAND; - break; case gko::mpi::op_type::bitwise_and: return MPI_BAND; - break; case gko::mpi::op_type::logical_or: return MPI_LOR; - break; case gko::mpi::op_type::bitwise_or: return MPI_BOR; - break; case gko::mpi::op_type::logical_xor: return MPI_LXOR; - break; case gko::mpi::op_type::bitwise_xor: return MPI_BXOR; - break; case gko::mpi::op_type::max_val_and_loc: return MPI_MAXLOC; - break; case gko::mpi::op_type::min_val_and_loc: return MPI_MINLOC; - break; default: GKO_NOT_SUPPORTED(op); - break; } } diff --git a/mpi/test/base/bindings.cpp b/mpi/test/base/bindings.cpp index 8cfaa6e207c..578d08bac90 100644 --- a/mpi/test/base/bindings.cpp +++ b/mpi/test/base/bindings.cpp @@ -32,7 +32,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include @@ -48,7 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include @@ -766,9 +764,9 @@ TEST_F(MpiBindings, CanScanValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::scan(&data, &sum, 1, gko::mpi::op_type::sum, 0); - gko::mpi::scan(&data, &max, 1, gko::mpi::op_type::max, 0); - gko::mpi::scan(&data, &min, 1, gko::mpi::op_type::min, 0); + gko::mpi::scan(&data, &sum, 1, gko::mpi::op_type::sum, nullptr); + gko::mpi::scan(&data, &max, 1, gko::mpi::op_type::max, nullptr); + gko::mpi::scan(&data, &min, 1, gko::mpi::op_type::min, nullptr); if (my_rank == 0) { EXPECT_EQ(sum, 3.0); EXPECT_EQ(max, 3.0); diff --git a/mpi/test/base/communicator.cpp b/mpi/test/base/communicator.cpp index 9fdd2e2a471..3b7b14059b3 100644 --- a/mpi/test/base/communicator.cpp +++ b/mpi/test/base/communicator.cpp @@ -40,13 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include -#include - - -#include "core/test/utils.hpp" namespace { diff --git a/mpi/test/base/exception_helpers.cpp b/mpi/test/base/exception_helpers.cpp index 5411c1bafb1..79b3dd600ba 100644 --- a/mpi/test/base/exception_helpers.cpp +++ b/mpi/test/base/exception_helpers.cpp @@ -41,7 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include namespace { From 3a6d2df950e2fcdd9576f91a3111c5b0ee263a91 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 5 Nov 2021 10:07:42 +0100 Subject: [PATCH 19/59] Move all wrappers to headers --- include/ginkgo/core/base/mpi.hpp | 387 ++++++++++++++++++++----------- mpi/CMakeLists.txt | 2 +- mpi/base/bindings.hpp | 188 --------------- mpi/base/helpers.hpp | 121 ---------- mpi/test/base/bindings.cpp | 56 +++-- 5 files changed, 287 insertions(+), 467 deletions(-) delete mode 100644 mpi/base/bindings.hpp delete mode 100644 mpi/base/helpers.hpp diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 55004c7109f..e7b6d5e3365 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -59,6 +59,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace mpi { + /* * This enum is used for selecting the operation type for functions that take * MPI_Op. For example the MPI_Reduce operations. @@ -92,6 +93,74 @@ enum class thread_type { }; +namespace detail { + +#define GKO_MPI_DATATYPE(BaseType, MPIType) \ + inline MPI_Datatype get_mpi_type(const BaseType&) { return MPIType; } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + +GKO_MPI_DATATYPE(bool, MPI_C_BOOL); +GKO_MPI_DATATYPE(char, MPI_CHAR); +GKO_MPI_DATATYPE(unsigned char, MPI_UNSIGNED_CHAR); +GKO_MPI_DATATYPE(unsigned, MPI_UNSIGNED); +GKO_MPI_DATATYPE(int, MPI_INT); +GKO_MPI_DATATYPE(unsigned long, MPI_UNSIGNED_LONG); +GKO_MPI_DATATYPE(unsigned short, MPI_UNSIGNED_SHORT); +GKO_MPI_DATATYPE(long, MPI_LONG); +GKO_MPI_DATATYPE(float, MPI_FLOAT); +GKO_MPI_DATATYPE(double, MPI_DOUBLE); +GKO_MPI_DATATYPE(long double, MPI_LONG_DOUBLE); +GKO_MPI_DATATYPE(std::complex, MPI_C_COMPLEX); +GKO_MPI_DATATYPE(std::complex, MPI_C_DOUBLE_COMPLEX); + + +template +MPI_Op get_operation(gko::mpi::op_type op) +{ + switch (op) { + case gko::mpi::op_type::sum: + return MPI_SUM; + case gko::mpi::op_type::min: + return MPI_MIN; + case gko::mpi::op_type::max: + return MPI_MAX; + case gko::mpi::op_type::product: + return MPI_PROD; + case gko::mpi::op_type::logical_and: + return MPI_LAND; + case gko::mpi::op_type::bitwise_and: + return MPI_BAND; + case gko::mpi::op_type::logical_or: + return MPI_LOR; + case gko::mpi::op_type::bitwise_or: + return MPI_BOR; + case gko::mpi::op_type::logical_xor: + return MPI_LXOR; + case gko::mpi::op_type::bitwise_xor: + return MPI_BXOR; + case gko::mpi::op_type::max_val_and_loc: + return MPI_MAXLOC; + case gko::mpi::op_type::min_val_and_loc: + return MPI_MINLOC; + default: + GKO_NOT_SUPPORTED(op); + } +} + + +template +inline const T* in_place() +{ + return reinterpret_cast(MPI_IN_PLACE); +} + + +} // namespace detail + + /* * Class that sets up and finalizes the MPI exactly once per program execution. * using the singleton pattern. This must be called before any of the MPI @@ -528,16 +597,6 @@ class window { MPI_Win window_; }; -namespace detail { - -template -inline const T* in_place() -{ - return reinterpret_cast(MPI_IN_PLACE); -} - -} // namespace detail - /** * Send (Blocking) data from calling process to destination rank. @@ -553,7 +612,7 @@ void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto send_type = detail::get_mpi_type(send_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, send_type, destination_rank, send_tag, comm->get())); } @@ -575,7 +634,7 @@ void send(const SendType* send_buffer, const int send_count, std::shared_ptr req, std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); + auto send_type = detail::get_mpi_type(send_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, send_type, destination_rank, send_tag, comm->get(), @@ -594,10 +653,10 @@ void send(const SendType* send_buffer, const int send_count, */ template void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, - const int recv_tag, std::shared_ptr status, - std::shared_ptr comm) + const int recv_tag, std::shared_ptr comm, + std::shared_ptr status = {}) { - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS( MPI_Recv(recv_buffer, recv_count, recv_type, source_rank, recv_tag, comm->get(), status ? status->get() : MPI_STATUS_IGNORE)); @@ -619,7 +678,7 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, const int recv_tag, std::shared_ptr req, std::shared_ptr comm) { - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, recv_type, source_rank, recv_tag, comm->get(), req->get())); @@ -641,7 +700,7 @@ void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - auto put_type = helpers::get_mpi_type(origin_buffer[0]); + auto put_type = detail::get_mpi_type(origin_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Put(origin_buffer, origin_count, put_type, target_rank, target_disp, target_count, put_type, window.get())); @@ -665,10 +724,10 @@ void put(const PutType* origin_buffer, const int origin_count, const int target_count, window& window, std::shared_ptr req) { - auto put_type = helpers::get_mpi_type(origin_buffer[0]); - bindings::req_put(origin_buffer, origin_count, put_type, target_rank, - target_disp, target_count, put_type, window.get(), - req->get()); + auto put_type = detail::get_mpi_type(origin_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput(origin_buffer, origin_count, put_type, + target_rank, target_disp, target_count, + put_type, window.get(), req->get())); } @@ -687,7 +746,7 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - auto get_type = helpers::get_mpi_type(origin_buffer[0]); + auto get_type = detail::get_mpi_type(origin_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Get(origin_buffer, origin_count, get_type, target_rank, target_disp, target_count, get_type, window.get())); @@ -710,7 +769,7 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window, std::shared_ptr req) { - auto get_type = helpers::get_mpi_type(origin_buffer[0]); + auto get_type = detail::get_mpi_type(origin_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(origin_buffer, origin_count, get_type, target_rank, target_disp, target_count, get_type, window, req->get())); @@ -729,8 +788,7 @@ template void broadcast(BroadcastType* buffer, int count, int root_rank, std::shared_ptr comm) { - auto bcast_type = helpers::get_mpi_type(buffer[0]); - bindings::broadcast(buffer, count, bcast_type, root_rank, ); + auto bcast_type = detail::get_mpi_type(buffer[0]); GKO_ASSERT_NO_MPI_ERRORS( MPI_Bcast(buffer, count, bcast_type, root_rank, comm->get())); } @@ -747,11 +805,11 @@ void broadcast(BroadcastType* buffer, int count, int root_rank, */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum = op_type::sum, int root_rank = 0, + op_type op_enum, int root_rank, std::shared_ptr comm) { - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(send_buffer[0]); + auto operation = detail::get_operation(op_enum); + auto reduce_type = detail::get_mpi_type(send_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, reduce_type, operation, root_rank, comm->get())); @@ -770,12 +828,12 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum = op_type::sum, int root_rank = 0, + op_type op_enum, int root_rank, std::shared_ptr comm, std::shared_ptr req) { - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(send_buffer[0]); + auto operation = detail::get_operation(op_enum); + auto reduce_type = detail::get_mpi_type(send_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, reduce_type, operation, root_rank, comm->get(), req->get())); @@ -795,8 +853,8 @@ template void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, std::shared_ptr comm) { - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + auto operation = detail::get_operation(op_enum); + auto reduce_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(detail::in_place(), recv_buffer, count, reduce_type, operation, comm->get())); @@ -818,8 +876,8 @@ void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, std::shared_ptr comm, std::shared_ptr req) { - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + auto operation = detail::get_operation(op_enum); + auto reduce_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS( MPI_Iallreduce(detail::in_place(), recv_buffer, count, reduce_type, operation, comm->get(), req->get())); @@ -842,8 +900,8 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, op_type op_enum, std::shared_ptr comm) { - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + auto operation = detail::get_operation(op_enum); + auto reduce_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( send_buffer, recv_buffer, count, reduce_type, operation, comm->get())); } @@ -866,8 +924,8 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, std::shared_ptr comm, std::shared_ptr req) { - auto operation = helpers::get_operation(op_enum); - auto reduce_type = helpers::get_mpi_type(recv_buffer[0]); + auto operation = detail::get_operation(op_enum); + auto reduce_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, reduce_type, operation, comm->get(), req->get())); @@ -889,11 +947,11 @@ void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::gather(send_buffer, send_count, send_type, recv_buffer, - recv_count, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + root_rank, comm->get())); } @@ -910,16 +968,16 @@ void gather(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank, - std::shared_ptr comm) +void gatherv(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank, + std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::gatherv(send_buffer, send_count, send_type, recv_buffer, - recv_counts, displacements, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( + send_buffer, send_count, send_type, recv_buffer, recv_counts, + displacements, recv_type, root_rank, comm->get())); } @@ -937,11 +995,11 @@ void all_gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::all_gather(send_buffer, send_count, send_type, recv_buffer, - recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world()); + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + comm->get())); } @@ -959,11 +1017,11 @@ void scatter(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::scatter(send_buffer, send_count, send_type, recv_buffer, - recv_count, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + root_rank, comm->get())); } @@ -978,16 +1036,38 @@ void scatter(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void scatter(const SendType* send_buffer, const int* send_counts, - const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank, - std::shared_ptr comm) +void scatterv(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank, + std::shared_ptr comm) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - bindings::scatterv(send_buffer, send_counts, displacements, send_type, - recv_buffer, recv_count, recv_type, root_rank, - comm ? comm->get() : communicator::get_comm_world()); + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( + send_buffer, send_counts, displacements, send_type, recv_buffer, + recv_count, recv_type, root_rank, comm->get())); +} + + +/** + * Communicate data from all ranks to all other ranks in place (MPI_Alltoall). + * See MPI documentation for more details. + * + * @param buffer the buffer to send and the buffer receive + * @param recv_count the number of elements to receive + * @param comm the communicator + * + * @note This overload uses MPI_IN_PLACE and the source and destination buffers + * are the same. + */ +template +void all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) +{ + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(detail::in_place(), + recv_count, recv_type, recv_buffer, + recv_count, recv_type, comm->get())); } @@ -1005,21 +1085,36 @@ void scatter(const SendType* send_buffer, const int* send_counts, */ template void all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm = {}, + std::shared_ptr comm, std::shared_ptr req) { - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::all_to_all( - bindings::in_place(), recv_count, recv_type, recv_buffer, - recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_to_all( - bindings::in_place(), recv_count, recv_type, recv_buffer, - recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( + detail::in_place(), recv_count, recv_type, recv_buffer, + recv_count, recv_type, comm->get(), req->get())); +} + + +/** + * Communicate data from all ranks to all other ranks (MPI_Alltoall). + * See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param recv_buffer the buffer to receive + * @param recv_count the number of elements to receive + * @param comm the communicator + */ +template +void all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) +{ + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + comm->get())); } @@ -1036,23 +1131,43 @@ void all_to_all(RecvType* recv_buffer, const int recv_count, */ template void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count{}, - std::shared_ptr comm = {}, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm, std::shared_ptr req) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::all_to_all( - send_buffer, send_count, send_type, recv_buffer, - recv_count == 0 ? send_count : recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_to_all( - send_buffer, send_count, send_type, recv_buffer, - recv_count == 0 ? send_count : recv_count, recv_type, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(send_buffer, send_count, send_type, + recv_buffer, recv_count, recv_type, + comm->get(), req->get())); +} + + +/** + * Communicate data from all ranks to all other ranks with + * offsets (MPI_Alltoallv). See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param send_offsets the offsets for the send buffer + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param recv_offsets the offsets for the recv buffer + * @param stride the stride to be used in case of sending concatenated data + * @param comm the communicator + */ +template +void all_to_all_v(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + const int stride, std::shared_ptr comm) +{ + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( + send_buffer, send_counts, send_offsets, send_type, recv_buffer, + recv_counts, recv_offsets, recv_type, comm->get())); } @@ -1071,26 +1186,18 @@ void all_to_all(const SendType* send_buffer, const int send_count, * @param req the request handle */ template -void all_to_all(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const int stride, std::shared_ptr comm = {}, - std::shared_ptr req = {}) +void all_to_all_v(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + const int stride, std::shared_ptr comm, + std::shared_ptr req) { - auto send_type = helpers::get_mpi_type(send_buffer[0]); - auto recv_type = helpers::get_mpi_type(recv_buffer[0]); - - if (!req.get()) { - bindings::all_to_all_v( - send_buffer, send_counts, send_offsets, send_type, recv_buffer, - recv_counts, recv_offsets, recv_type, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_all_to_all_v( - send_buffer, send_counts, send_offsets, send_type, recv_buffer, - recv_counts, recv_offsets, recv_type, - comm ? comm->get() : communicator::get_comm_world(), req->get()); - } + auto send_type = detail::get_mpi_type(send_buffer[0]); + auto recv_type = detail::get_mpi_type(recv_buffer[0]); + + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( + send_buffer, send_counts, send_offsets, send_type, recv_buffer, + recv_counts, recv_offsets, recv_type, comm->get(), req->get())); } @@ -1107,20 +1214,36 @@ void all_to_all(const SendType* send_buffer, const int* send_counts, */ template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum = op_type::sum, - std::shared_ptr comm = {}, - std::shared_ptr req = {}) + op_type op_enum, std::shared_ptr comm) { - auto operation = helpers::get_operation(op_enum); - auto scan_type = helpers::get_mpi_type(recv_buffer[0]); - if (!req.get()) { - bindings::scan(send_buffer, recv_buffer, count, scan_type, operation, - comm ? comm->get() : communicator::get_comm_world()); - } else { - bindings::i_scan(send_buffer, recv_buffer, count, scan_type, operation, - comm ? comm->get() : communicator::get_comm_world(), - req->get()); - } + auto operation = detail::get_operation(op_enum); + auto scan_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, + scan_type, operation, comm->get())); +} + + +/** + * Does a scan operation with the given operator. + * (MPI_Scan). See MPI documentation for more details. + * + * @param send_buffer the buffer to scan from + * @param recv_buffer the result buffer + * @param recv_count the number of elements to scan + * @param op_enum the operation type to be used for the scan. See @op_type + * @param comm the communicator + * @param req the request handle + */ +template +void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, + op_type op_enum, std::shared_ptr comm, + std::shared_ptr req) +{ + auto operation = detail::get_operation(op_enum); + auto scan_type = detail::get_mpi_type(recv_buffer[0]); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, + scan_type, operation, comm->get(), + req->get())); } diff --git a/mpi/CMakeLists.txt b/mpi/CMakeLists.txt index febad2f1b7e..7f63bf60863 100644 --- a/mpi/CMakeLists.txt +++ b/mpi/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(ginkgo_mpi) target_sources(ginkgo_mpi PRIVATE base/exception.cpp - base/bindings.cpp + # base/bindings.cpp base/version.cpp ) diff --git a/mpi/base/bindings.hpp b/mpi/base/bindings.hpp deleted file mode 100644 index 85ea71a87c4..00000000000 --- a/mpi/base/bindings.hpp +++ /dev/null @@ -1,188 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GKO_MPI_BINDINGS_HPP_ -#define GKO_MPI_BINDINGS_HPP_ - - -#include - - -#include - - -#include - - -namespace gko { -/** - * @brief The MPI namespace. - * - * @ingroup mpi - */ -namespace mpi { -/** - * @brief The bindings namespace. - * - * @ingroup bindings - */ -namespace bindings { - - -inline void gather(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, void* recv_buffer, - const int recv_count, MPI_Datatype& recv_type, int root, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - root, comm)); -} - - -inline void gatherv(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, void* recv_buffer, - const int* recv_counts, const int* displacements, - MPI_Datatype& recv_type, int root_rank, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Gatherv(send_buffer, send_count, send_type, recv_buffer, - recv_counts, displacements, recv_type, root_rank, comm)); -} - - -inline void all_gather(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, void* recv_buffer, - const int recv_count, MPI_Datatype& recv_type, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - comm)); -} - - -inline void scatter(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, void* recv_buffer, - const int recv_count, MPI_Datatype& recv_type, int root, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - root, comm)); -} - - -inline void scatterv(const void* send_buffer, const int* send_counts, - const int* displacements, MPI_Datatype& send_type, - void* recv_buffer, const int recv_count, - MPI_Datatype& recv_type, int root_rank, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Scatterv(send_buffer, send_counts, displacements, send_type, - recv_buffer, recv_count, recv_type, root_rank, comm)); -} - - -inline void all_to_all(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, void* recv_buffer, - const int recv_count, MPI_Datatype& recv_type, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - comm)); -} - - -inline void i_all_to_all(const void* send_buffer, const int send_count, - MPI_Datatype& send_type, void* recv_buffer, - const int recv_count, MPI_Datatype& recv_type, - const MPI_Comm& comm, MPI_Request* requests) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - comm, requests)); -} - - -inline void all_to_all_v(const void* send_buffer, const int* send_count, - const int* send_offsets, const MPI_Datatype& send_type, - void* recv_buffer, const int* recv_count, - const int* recv_offsets, const MPI_Datatype& recv_type, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Alltoallv(send_buffer, send_count, send_offsets, send_type, - recv_buffer, recv_count, recv_offsets, recv_type, comm)); -} - - -inline void i_all_to_all_v(const void* send_buffer, const int* send_count, - const int* send_offsets, - const MPI_Datatype& send_type, void* recv_buffer, - const int* recv_count, const int* recv_offsets, - const MPI_Datatype& recv_type, const MPI_Comm& comm, - MPI_Request* requests) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( - send_buffer, send_count, send_offsets, send_type, recv_buffer, - recv_count, recv_offsets, recv_type, comm, requests)); -} - - -inline void scan(const void* send_buffer, void* recv_buffer, int count, - MPI_Datatype& reduce_type, MPI_Op operation, - const MPI_Comm& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, - reduce_type, operation, comm)); -} - - -inline void i_scan(const void* send_buffer, void* recv_buffer, int count, - MPI_Datatype& reduce_type, MPI_Op operation, - const MPI_Comm& comm, MPI_Request* requests) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, - reduce_type, operation, comm, requests)); -} - - -} // namespace bindings -} // namespace mpi -} // namespace gko - - -#endif // GKO_MPI_BINDINGS_HPP_ diff --git a/mpi/base/helpers.hpp b/mpi/base/helpers.hpp deleted file mode 100644 index f643ed73e0a..00000000000 --- a/mpi/base/helpers.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GKO_MPI_HELPERS_HPP_ -#define GKO_MPI_HELPERS_HPP_ - - -#include - - -#include - - -#include - - -namespace gko { -/** - * @brief The MPI namespace. - * - * @ingroup mpi - */ -namespace mpi { -/** - * @brief The helpers namespace. - * - * @ingroup helper - */ -namespace helpers { - -#define GKO_MPI_DATATYPE(BaseType, MPIType) \ - inline MPI_Datatype get_mpi_type(const BaseType&) { return MPIType; } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - - -GKO_MPI_DATATYPE(bool, MPI_C_BOOL); -GKO_MPI_DATATYPE(char, MPI_CHAR); -GKO_MPI_DATATYPE(unsigned char, MPI_UNSIGNED_CHAR); -GKO_MPI_DATATYPE(unsigned, MPI_UNSIGNED); -GKO_MPI_DATATYPE(int, MPI_INT); -GKO_MPI_DATATYPE(unsigned long, MPI_UNSIGNED_LONG); -GKO_MPI_DATATYPE(unsigned short, MPI_UNSIGNED_SHORT); -GKO_MPI_DATATYPE(long, MPI_LONG); -GKO_MPI_DATATYPE(float, MPI_FLOAT); -GKO_MPI_DATATYPE(double, MPI_DOUBLE); -GKO_MPI_DATATYPE(long double, MPI_LONG_DOUBLE); -GKO_MPI_DATATYPE(std::complex, MPI_C_COMPLEX); -GKO_MPI_DATATYPE(std::complex, MPI_C_DOUBLE_COMPLEX); - - -template -MPI_Op get_operation(gko::mpi::op_type op) -{ - switch (op) { - case gko::mpi::op_type::sum: - return MPI_SUM; - case gko::mpi::op_type::min: - return MPI_MIN; - case gko::mpi::op_type::max: - return MPI_MAX; - case gko::mpi::op_type::product: - return MPI_PROD; - case gko::mpi::op_type::logical_and: - return MPI_LAND; - case gko::mpi::op_type::bitwise_and: - return MPI_BAND; - case gko::mpi::op_type::logical_or: - return MPI_LOR; - case gko::mpi::op_type::bitwise_or: - return MPI_BOR; - case gko::mpi::op_type::logical_xor: - return MPI_LXOR; - case gko::mpi::op_type::bitwise_xor: - return MPI_BXOR; - case gko::mpi::op_type::max_val_and_loc: - return MPI_MAXLOC; - case gko::mpi::op_type::min_val_and_loc: - return MPI_MINLOC; - default: - GKO_NOT_SUPPORTED(op); - } -} - - -} // namespace helpers -} // namespace mpi -} // namespace gko - - -#endif // GKO_MPI_HELPERS_HPP_ diff --git a/mpi/test/base/bindings.cpp b/mpi/test/base/bindings.cpp index 578d08bac90..0c3f645a0c2 100644 --- a/mpi/test/base/bindings.cpp +++ b/mpi/test/base/bindings.cpp @@ -105,12 +105,13 @@ TEST_F(MpiBindings, CanSendAndRecvValues) for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { gko::mpi::send(send_array.get_const_data(), 4, rank, - 40 + rank); + 40 + rank, comm); } } } else { recv_array = gko::Array{ref, 4}; - gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank); + gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, + comm); } if (my_rank != 0) { ASSERT_EQ(recv_array.get_data()[0], 1); @@ -138,13 +139,13 @@ TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { gko::mpi::send(send_array.get_data(), 4, rank, - 40 + rank, req); + 40 + rank, req, comm); } } } else { recv_array = gko::Array{ref, 4}; gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, - req); + req, comm); } gko::mpi::wait(req); if (my_rank != 0) { @@ -374,7 +375,7 @@ TEST_F(MpiBindings, CanBroadcastValues) // clang-format on array = gko::Array{gko::Array::view(ref, 8, data)}; } - gko::mpi::broadcast(array.get_data(), 8, 0); + gko::mpi::broadcast(array.get_data(), 8, 0, comm); auto comp_data = array.get_data(); ASSERT_EQ(comp_data[0], 2.0); ASSERT_EQ(comp_data[1], 3.0); @@ -406,9 +407,12 @@ TEST_F(MpiBindings, CanReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::reduce(&data, &sum, 1, gko::mpi::op_type::sum, 0); - gko::mpi::reduce(&data, &max, 1, gko::mpi::op_type::max, 0); - gko::mpi::reduce(&data, &min, 1, gko::mpi::op_type::min, 0); + gko::mpi::reduce(&data, &sum, 1, gko::mpi::op_type::sum, 0, + comm); + gko::mpi::reduce(&data, &max, 1, gko::mpi::op_type::max, 0, + comm); + gko::mpi::reduce(&data, &min, 1, gko::mpi::op_type::min, 0, + comm); if (my_rank == 0) { EXPECT_EQ(sum, 16.0); EXPECT_EQ(max, 6.0); @@ -432,7 +436,7 @@ TEST_F(MpiBindings, CanAllReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, &sum, 1, gko::mpi::op_type::sum); + gko::mpi::all_reduce(&data, &sum, 1, gko::mpi::op_type::sum, comm); ASSERT_EQ(sum, 16); } @@ -452,7 +456,7 @@ TEST_F(MpiBindings, CanAllReduceValuesInPlace) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, 1, gko::mpi::op_type::sum); + gko::mpi::all_reduce(&data, 1, gko::mpi::op_type::sum, comm); ASSERT_EQ(data, 16); } @@ -474,7 +478,8 @@ TEST_F(MpiBindings, CanScatterValues) } auto scatter_into_array = gko::Array{ref, 2}; gko::mpi::scatter(scatter_from_array.get_data(), 2, - scatter_into_array.get_data(), 2, 0); + scatter_into_array.get_data(), 2, 0, + comm); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], 2.0); @@ -510,7 +515,7 @@ TEST_F(MpiBindings, CanGatherValues) } auto gather_array = gko::Array{ref, static_cast(num_ranks)}; - gko::mpi::gather(&data, 1, gather_array.get_data(), 1, 0); + gko::mpi::gather(&data, 1, gather_array.get_data(), 1, 0, comm); if (my_rank == 0) { ASSERT_EQ(gather_array.get_data()[0], 3); ASSERT_EQ(gather_array.get_data()[1], 5); @@ -551,10 +556,11 @@ TEST_F(MpiBindings, CanScatterValuesWithDisplacements) } scatter_into_array = gko::Array{ref, static_cast(nelems)}; - gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0); - gko::mpi::scatter( + gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0, comm); + gko::mpi::scatterv( scatter_from_array.get_data(), s_counts.get_data(), - displacements.get_data(), scatter_into_array.get_data(), nelems, 0); + displacements.get_data(), scatter_into_array.get_data(), nelems, 0, + comm); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], 2.0); @@ -615,10 +621,10 @@ TEST_F(MpiBindings, CanGatherValuesWithDisplacements) gko::Array::view(ref->get_master(), 3, data)}; } - gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0); - gko::mpi::gather( + gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0, comm); + gko::mpi::gatherv( gather_from_array.get_data(), nelems, gather_into_array.get_data(), - r_counts.get_data(), displacements.get_data(), 0); + r_counts.get_data(), displacements.get_data(), 0, comm); auto comp_data = gather_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], 2.0); @@ -662,7 +668,7 @@ TEST_F(MpiBindings, AllToAllWorksCorrectly) } gko::mpi::all_to_all(send_array.get_data(), 1, - recv_array.get_data()); + recv_array.get_data(), 1, comm); this->assert_equal_arrays(recv_array, ref_array); } @@ -689,7 +695,7 @@ TEST_F(MpiBindings, AllToAllInPlaceWorksCorrectly) ref_array = gko::Array(ref, {2.0, 2.0, 0.0, -2.0}); } - gko::mpi::all_to_all(recv_array.get_data(), 1); + gko::mpi::all_to_all(recv_array.get_data(), 1, comm); this->assert_equal_arrays(recv_array, ref_array); } @@ -740,10 +746,10 @@ TEST_F(MpiBindings, AllToAllVWorksCorrectly) ref_array = gko::Array{ref, {0.0, 2.5, 3.5, 3.0}}; } - gko::mpi::all_to_all( + gko::mpi::all_to_all_v( send_array.get_data(), scounts_array.get_data(), soffset_array.get_data(), recv_array.get_data(), - rcounts_array.get_data(), roffset_array.get_data()); + rcounts_array.get_data(), roffset_array.get_data(), {}, comm); this->assert_equal_arrays(recv_array, ref_array); } @@ -764,9 +770,9 @@ TEST_F(MpiBindings, CanScanValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::scan(&data, &sum, 1, gko::mpi::op_type::sum, nullptr); - gko::mpi::scan(&data, &max, 1, gko::mpi::op_type::max, nullptr); - gko::mpi::scan(&data, &min, 1, gko::mpi::op_type::min, nullptr); + gko::mpi::scan(&data, &sum, 1, gko::mpi::op_type::sum, comm); + gko::mpi::scan(&data, &max, 1, gko::mpi::op_type::max, comm); + gko::mpi::scan(&data, &min, 1, gko::mpi::op_type::min, comm); if (my_rank == 0) { EXPECT_EQ(sum, 3.0); EXPECT_EQ(max, 3.0); From 25d5e6264aa25f751354ce88179535935a21ffe9 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 5 Nov 2021 14:11:00 +0100 Subject: [PATCH 20/59] Update init_finalize --- .../mpi-simple-solver/mpi-simple-solver.cpp | 4 +--- include/ginkgo/core/base/mpi.hpp | 17 +++++------------ 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/examples/mpi-simple-solver/mpi-simple-solver.cpp b/examples/mpi-simple-solver/mpi-simple-solver.cpp index 86d7b2f4aa1..60ae981a450 100644 --- a/examples/mpi-simple-solver/mpi-simple-solver.cpp +++ b/examples/mpi-simple-solver/mpi-simple-solver.cpp @@ -47,10 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int main(int argc, char* argv[]) { - auto mpi_init_fin = gko::mpi::init_finalize::get_instance(argc, argv); + auto mpi_init_fin = gko::mpi::init_finalize(argc, argv); { - auto mpi_init_fin2 = gko::mpi::init_finalize::get_instance(argc, argv); - // Use some shortcuts. In Ginkgo, vectors are seen as a // gko::matrix::Dense with one column/one row. The advantage of this // concept is that using multiple vectors is a now a natural extension diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index e7b6d5e3365..26a37156086 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -168,14 +168,6 @@ inline const T* in_place() */ class init_finalize { public: - static init_finalize* get_instance( - int& argc, char**& argv, - const thread_type thread_t = thread_type::serialized) - { - static init_finalize instance(argc, argv, thread_t); - return &instance; - } - static bool is_finalized() { int flag = 0; @@ -190,8 +182,8 @@ class init_finalize { return flag; } -private: - init_finalize(int& argc, char**& argv, const thread_type thread_t) + init_finalize(int& argc, char**& argv, + const thread_type thread_t = thread_type::serialized) { auto flag = is_initialized(); if (!flag) { @@ -200,7 +192,7 @@ class init_finalize { MPI_Init_thread(&argc, &argv, this->required_thread_support_, &(this->provided_thread_support_))); } else { - // GKO_MPI_INITIALIZED; + GKO_MPI_INITIALIZED; } } @@ -212,6 +204,7 @@ class init_finalize { if (!flag) MPI_Finalize(); } +private: int num_args_; int required_thread_support_; int provided_thread_support_; @@ -466,7 +459,7 @@ double get_walltime() { return MPI_Wtime(); } * * @param comm the communicator */ -void synchronize(const communicator& comm = communicator::get_comm_world()) +void synchronize(const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm.get())); } From 8544262815ea123bd3da591a35576f5bf90adc9a Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 5 Nov 2021 17:02:27 +0100 Subject: [PATCH 21/59] Remove ginkgo_mpi library, move to core --- CMakeLists.txt | 3 - cmake/get_info.cmake | 2 +- core/CMakeLists.txt | 22 +- {mpi/base => core/mpi}/exception.cpp | 0 {mpi => core/mpi}/get_info.cmake | 0 core/test/CMakeLists.txt | 3 + {mpi/test => core/test/mpi}/CMakeLists.txt | 0 .../test/mpi}/base/CMakeLists.txt | 0 {mpi/test => core/test/mpi}/base/bindings.cpp | 16 +- .../test/mpi}/base/communicator.cpp | 4 +- .../test/mpi}/base/exception_helpers.cpp | 4 +- .../test/mpi}/cuda-aware-mpi-test.cu | 0 .../test/mpi}/gtest-mpi-listener.hpp | 0 .../test => core/test/mpi}/gtest-mpi-main.hpp | 0 .../mpi-simple-solver/mpi-simple-solver.cpp | 1 - include/ginkgo/core/base/mpi.hpp | 4 +- include/ginkgo/core/base/version.hpp | 18 -- mpi/CMakeLists.txt | 31 --- mpi/base/bindings.cpp | 207 ------------------ mpi/base/version.cpp | 48 ---- 20 files changed, 31 insertions(+), 332 deletions(-) rename {mpi/base => core/mpi}/exception.cpp (100%) rename {mpi => core/mpi}/get_info.cmake (100%) rename {mpi/test => core/test/mpi}/CMakeLists.txt (100%) rename {mpi/test => core/test/mpi}/base/CMakeLists.txt (100%) rename {mpi/test => core/test/mpi}/base/bindings.cpp (98%) rename {mpi/test => core/test/mpi}/base/communicator.cpp (98%) rename {mpi/test => core/test/mpi}/base/exception_helpers.cpp (96%) rename {mpi/test => core/test/mpi}/cuda-aware-mpi-test.cu (100%) rename {mpi/test => core/test/mpi}/gtest-mpi-listener.hpp (100%) rename {mpi/test => core/test/mpi}/gtest-mpi-main.hpp (100%) delete mode 100644 mpi/CMakeLists.txt delete mode 100644 mpi/base/bindings.cpp delete mode 100644 mpi/base/version.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f8295bd0adc..5fa791192c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -289,9 +289,6 @@ endif() if (GINKGO_BUILD_OMP) add_subdirectory(omp) # High-performance omp kernels endif() -if(GINKGO_BUILD_MPI) - add_subdirectory(mpi) # The MPI module -endif() add_subdirectory(core) # Core Ginkgo types and top-level functions add_subdirectory(include) # Public API self-contained check if (GINKGO_BUILD_TESTS) diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake index ec25986f150..479b889aeaf 100644 --- a/cmake/get_info.cmake +++ b/cmake/get_info.cmake @@ -156,7 +156,7 @@ IF(GINKGO_BUILD_OMP) ENDIF() IF(GINKGO_BUILD_MPI) - include(mpi/get_info.cmake) + include(core/mpi/get_info.cmake) ENDIF() IF(GINKGO_BUILD_CUDA) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 17e6b7055ad..a0e62f31ccd 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -61,8 +61,8 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if (GINKGO_BUILD_MPI) - target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX) - target_include_directories(ginkgo SYSTEM PUBLIC ${MPI_INCLUDE_PATH}) + target_sources(ginkgo + PRIVATE mpi/exception.cpp) endif() ginkgo_compile_features(ginkgo) @@ -71,13 +71,8 @@ target_compile_options(ginkgo PRIVATE "${GINKGO_COMPILER_FLAGS}") # add a namespace alias so Ginkgo can always be included as Ginkgo::ginkgo # regardless of whether it is installed or added as a subdirectory add_library(Ginkgo::ginkgo ALIAS ginkgo) -if(GINKGO_BUILD_MPI) - target_link_libraries(ginkgo - PUBLIC ginkgo_device ginkgo_omp ginkgo_mpi ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) -else() - target_link_libraries(ginkgo - PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) -endif() +target_link_libraries(ginkgo + PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) # The PAPI dependency needs to be exposed to the user. set(GKO_RPATH_ADDITIONS "") if (GINKGO_HAVE_PAPI_SDE) @@ -87,6 +82,15 @@ if (GINKGO_HAVE_PAPI_SDE) list(APPEND GKO_RPATH_ADDITIONS "${GKO_PAPI_LIBDIR}") endif() +if (GINKGO_BUILD_MPI) + target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX) + target_include_directories(ginkgo SYSTEM PUBLIC ${MPI_INCLUDE_PATH}) + + # Propagate some useful information + set(MPI_C_VERSION ${MPI_C_VERSION} PARENT_SCOPE) + set(MPI_C_LIBRARIES ${MPI_C_LIBRARIES} PARENT_SCOPE) +endif() + # Since we have a public dependency on HIP, this dependency appears # here as well if (GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") diff --git a/mpi/base/exception.cpp b/core/mpi/exception.cpp similarity index 100% rename from mpi/base/exception.cpp rename to core/mpi/exception.cpp diff --git a/mpi/get_info.cmake b/core/mpi/get_info.cmake similarity index 100% rename from mpi/get_info.cmake rename to core/mpi/get_info.cmake diff --git a/core/test/CMakeLists.txt b/core/test/CMakeLists.txt index fcf1cf64777..2bb711e7a2e 100644 --- a/core/test/CMakeLists.txt +++ b/core/test/CMakeLists.txt @@ -2,6 +2,9 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(accessor) add_subdirectory(base) +if(GINKGO_BUILD_MPI) + add_subdirectory(mpi) +endif() add_subdirectory(factorization) add_subdirectory(log) add_subdirectory(matrix) diff --git a/mpi/test/CMakeLists.txt b/core/test/mpi/CMakeLists.txt similarity index 100% rename from mpi/test/CMakeLists.txt rename to core/test/mpi/CMakeLists.txt diff --git a/mpi/test/base/CMakeLists.txt b/core/test/mpi/base/CMakeLists.txt similarity index 100% rename from mpi/test/base/CMakeLists.txt rename to core/test/mpi/base/CMakeLists.txt diff --git a/mpi/test/base/bindings.cpp b/core/test/mpi/base/bindings.cpp similarity index 98% rename from mpi/test/base/bindings.cpp rename to core/test/mpi/base/bindings.cpp index 0c3f645a0c2..4160d6eef1a 100644 --- a/mpi/test/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "gtest-mpi-listener.hpp" -#include "gtest-mpi-main.hpp" +#include "../gtest-mpi-listener.hpp" +#include "../gtest-mpi-main.hpp" #include @@ -181,7 +181,7 @@ TEST_F(MpiBindings, CanPutValuesWithLockAll) } } win.unlock_all(); - gko::mpi::synchronize(); + gko::mpi::synchronize(comm); ASSERT_EQ(data[0], 1); ASSERT_EQ(data[1], 2); ASSERT_EQ(data[2], 3); @@ -214,7 +214,7 @@ TEST_F(MpiBindings, CanPutValuesWithExclusiveLock) } } } - gko::mpi::synchronize(); + gko::mpi::synchronize(comm); ASSERT_EQ(data[0], 1); ASSERT_EQ(data[1], 2); ASSERT_EQ(data[2], 3); @@ -248,7 +248,7 @@ TEST_F(MpiBindings, CanPutValuesWithFence) } } win.fence(); - gko::mpi::synchronize(); + gko::mpi::synchronize(comm); ASSERT_EQ(data[0], 1); ASSERT_EQ(data[1], 2); ASSERT_EQ(data[2], 3); @@ -283,7 +283,7 @@ TEST_F(MpiBindings, CanGetValuesWithLockAll) } win.unlock_all(); } - gko::mpi::synchronize(); + gko::mpi::synchronize(comm); ASSERT_EQ(data[0], 1); ASSERT_EQ(data[1], 2); ASSERT_EQ(data[2], 3); @@ -318,7 +318,7 @@ TEST_F(MpiBindings, CanGetValuesWithExclusiveLock) } } } - gko::mpi::synchronize(); + gko::mpi::synchronize(comm); ASSERT_EQ(data[0], 1); ASSERT_EQ(data[1], 2); ASSERT_EQ(data[2], 3); @@ -352,7 +352,7 @@ TEST_F(MpiBindings, CanGetValuesWithFence) } } win.fence(); - gko::mpi::synchronize(); + gko::mpi::synchronize(comm); ASSERT_EQ(data[0], 1); ASSERT_EQ(data[1], 2); ASSERT_EQ(data[2], 3); diff --git a/mpi/test/base/communicator.cpp b/core/test/mpi/base/communicator.cpp similarity index 98% rename from mpi/test/base/communicator.cpp rename to core/test/mpi/base/communicator.cpp index 3b7b14059b3..a443fe1125d 100644 --- a/mpi/test/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -35,8 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "gtest-mpi-listener.hpp" -#include "gtest-mpi-main.hpp" +#include "../gtest-mpi-listener.hpp" +#include "../gtest-mpi-main.hpp" #include diff --git a/mpi/test/base/exception_helpers.cpp b/core/test/mpi/base/exception_helpers.cpp similarity index 96% rename from mpi/test/base/exception_helpers.cpp rename to core/test/mpi/base/exception_helpers.cpp index 79b3dd600ba..40ee9b04842 100644 --- a/mpi/test/base/exception_helpers.cpp +++ b/core/test/mpi/base/exception_helpers.cpp @@ -35,8 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "gtest-mpi-listener.hpp" -#include "gtest-mpi-main.hpp" +#include "../gtest-mpi-listener.hpp" +#include "../gtest-mpi-main.hpp" #include diff --git a/mpi/test/cuda-aware-mpi-test.cu b/core/test/mpi/cuda-aware-mpi-test.cu similarity index 100% rename from mpi/test/cuda-aware-mpi-test.cu rename to core/test/mpi/cuda-aware-mpi-test.cu diff --git a/mpi/test/gtest-mpi-listener.hpp b/core/test/mpi/gtest-mpi-listener.hpp similarity index 100% rename from mpi/test/gtest-mpi-listener.hpp rename to core/test/mpi/gtest-mpi-listener.hpp diff --git a/mpi/test/gtest-mpi-main.hpp b/core/test/mpi/gtest-mpi-main.hpp similarity index 100% rename from mpi/test/gtest-mpi-main.hpp rename to core/test/mpi/gtest-mpi-main.hpp diff --git a/examples/mpi-simple-solver/mpi-simple-solver.cpp b/examples/mpi-simple-solver/mpi-simple-solver.cpp index 60ae981a450..fcda3f44927 100644 --- a/examples/mpi-simple-solver/mpi-simple-solver.cpp +++ b/examples/mpi-simple-solver/mpi-simple-solver.cpp @@ -173,5 +173,4 @@ int main(int argc, char* argv[]) std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, lend(res)); } - gko::mpi::synchronize(); } diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 26a37156086..2457fddbe7c 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -459,9 +459,9 @@ double get_walltime() { return MPI_Wtime(); } * * @param comm the communicator */ -void synchronize(const communicator& comm) +void synchronize(const std::shared_ptr& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm->get())); } diff --git a/include/ginkgo/core/base/version.hpp b/include/ginkgo/core/base/version.hpp index 6e58a2c9323..2d41e366c0d 100644 --- a/include/ginkgo/core/base/version.hpp +++ b/include/ginkgo/core/base/version.hpp @@ -219,17 +219,6 @@ class version_info { */ version dpcpp_version; -#if GKO_HAVE_MPI - - /** - * Contains version information of the MPI module. - * - * This is the version of the static/shared library called "ginkgo_mpi". - */ - version mpi_version; - -#endif - private: static constexpr version get_header_version() noexcept { @@ -249,10 +238,6 @@ class version_info { static version get_dpcpp_version() noexcept; -#if GKO_HAVE_MPI - static version get_mpi_version() noexcept; -#endif - version_info() : header_version{get_header_version()}, core_version{get_core_version()}, @@ -260,9 +245,6 @@ class version_info { omp_version{get_omp_version()}, cuda_version{get_cuda_version()}, hip_version{get_hip_version()}, -#if GKO_HAVE_MPI - mpi_version{get_mpi_version()}, -#endif dpcpp_version{get_dpcpp_version()} {} }; diff --git a/mpi/CMakeLists.txt b/mpi/CMakeLists.txt deleted file mode 100644 index 7f63bf60863..00000000000 --- a/mpi/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -find_package(MPI REQUIRED) - -add_library(ginkgo_mpi) -target_sources(ginkgo_mpi - PRIVATE - base/exception.cpp - # base/bindings.cpp - base/version.cpp - ) - -ginkgo_compile_features(ginkgo_mpi) -target_include_directories(ginkgo_mpi - SYSTEM PRIVATE ${MPI_INCLUDE_PATH}) -target_link_libraries(ginkgo_mpi PRIVATE MPI::MPI_CXX) -target_compile_options(ginkgo_mpi PRIVATE "${GINKGO_COMPILER_FLAGS}") - -ginkgo_default_includes(ginkgo_mpi) -ginkgo_install_library(ginkgo_mpi mpi) - -if (GINKGO_CHECK_CIRCULAR_DEPS) - ginkgo_check_headers(ginkgo_mpi) -endif() - -if(GINKGO_BUILD_TESTS) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/test) - add_subdirectory(test) -endif() - -# Propagate some useful information -set(MPI_C_VERSION ${MPI_C_VERSION} PARENT_SCOPE) -set(MPI_C_LIBRARIES ${MPI_C_LIBRARIES} PARENT_SCOPE) diff --git a/mpi/base/bindings.cpp b/mpi/base/bindings.cpp deleted file mode 100644 index e4313862838..00000000000 --- a/mpi/base/bindings.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - - -#include -#include - - -#include "mpi/base/bindings.hpp" - - -#include -#include -#include - - -#include "mpi/base/helpers.hpp" - - -namespace gko { -namespace mpi { - - -#define GKO_DECLARE_WINDOW(ValueType) class window - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_WINDOW); - - -#define GKO_DECLARE_SEND(SendType) \ - void send(const SendType* send_buffer, const int send_count, \ - const int destination_rank, const int send_tag, \ - std::shared_ptr req, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SEND); - - -#define GKO_DECLARE_RECV(RecvType) \ - void recv(RecvType* recv_buffer, const int recv_count, \ - const int source_rank, const int recv_tag, \ - std::shared_ptr req, std::shared_ptr status, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_RECV); - - -#define GKO_DECLARE_PUT(PutType) \ - void put(const PutType* origin_buffer, const int origin_count, \ - const int target_rank, const unsigned int target_disp, \ - const int target_count, window& window, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_PUT); - - -#define GKO_DECLARE_GET(GetType) \ - void get(GetType* origin_buffer, const int origin_count, \ - const int target_rank, const unsigned int target_disp, \ - const int target_count, window& window, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_GET); - - -#define GKO_DECLARE_BCAST(BroadcastType) \ - void broadcast(BroadcastType* buffer, int count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_BCAST); - - -#define GKO_DECLARE_REDUCE(ReduceType) \ - void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ - int count, op_type operation, int root_rank, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_REDUCE); - - -#define GKO_DECLARE_ALLREDUCE1(ReduceType) \ - void all_reduce(ReduceType* recv_buffer, int count, op_type operation, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE1); - - -#define GKO_DECLARE_ALLREDUCE2(ReduceType) \ - void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, \ - int count, op_type operation, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALLREDUCE2); - - -#define GKO_DECLARE_GATHER1(SendType, RecvType) \ - void gather(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER1); - - -#define GKO_DECLARE_GATHER2(SendType, RecvType) \ - void gather(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int* recv_counts, \ - const int* displacements, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GATHER2); - - -#define GKO_DECLARE_ALLGATHER(SendType, RecvType) \ - void all_gather(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALLGATHER); - - -#define GKO_DECLARE_SCATTER1(SendType, RecvType) \ - void scatter(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER1); - - -#define GKO_DECLARE_SCATTER2(SendType, RecvType) \ - void scatter(const SendType* send_buffer, const int* send_counts, \ - const int* displacements, RecvType* recv_buffer, \ - const int recv_count, int root_rank, \ - std::shared_ptr comm) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCATTER2); - - -#define GKO_DECLARE_ALL_TO_ALL1(RecvType) \ - void all_to_all(RecvType* recv_buffer, const int recv_count, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_ALL_TO_ALL1); - - -#define GKO_DECLARE_ALL_TO_ALL2(SendType, RecvType) \ - void all_to_all(const SendType* send_buffer, const int send_count, \ - RecvType* recv_buffer, const int recv_count, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ALL_TO_ALL2); - - -#define GKO_DECLARE_ALL_TO_ALL_V(SendType, RecvType) \ - void all_to_all(const SendType* send_buffer, const int* send_counts, \ - const int* send_offsets, RecvType* recv_buffer, \ - const int* recv_counts, const int* recv_offsets, \ - const int stride, \ - std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_COMBINED_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_ALL_TO_ALL_V); - - -#define GKO_DECLARE_SCAN(ScanType) \ - void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, \ - op_type op_enum, std::shared_ptr comm, \ - std::shared_ptr req) - -GKO_INSTANTIATE_FOR_EACH_POD_TYPE(GKO_DECLARE_SCAN); - - -} // namespace mpi -} // namespace gko diff --git a/mpi/base/version.cpp b/mpi/base/version.cpp deleted file mode 100644 index c3ee7ab7c78..00000000000 --- a/mpi/base/version.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -namespace gko { - - -version version_info::get_mpi_version() noexcept -{ - // When compiling the module, the header version is the same as the library - // version. Mismatch between the header and the module versions may happen - // if using shared libraries from different versions of Ginkgo. - return version_info::get_header_version(); -} - - -} // namespace gko From fb956894a79d78e34de3e2ffc574ea0176076249 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Fri, 5 Nov 2021 17:28:31 +0000 Subject: [PATCH 22/59] Format files Co-authored-by: Pratik Nayak --- core/mpi/exception.cpp | 4 +--- core/test/mpi/base/bindings.cpp | 12 ++++-------- core/test/mpi/base/communicator.cpp | 9 +++++---- core/test/mpi/base/exception_helpers.cpp | 7 ++++--- core/test/mpi/cuda-aware-mpi-test.cu | 8 ++++---- core/test/mpi/gtest-mpi-listener.hpp | 6 ++++-- core/test/mpi/gtest-mpi-main.hpp | 1 - include/ginkgo/core/base/mpi.hpp | 7 ++++--- 8 files changed, 26 insertions(+), 28 deletions(-) diff --git a/core/mpi/exception.cpp b/core/mpi/exception.cpp index 358fa3dd0b4..cb8cf97c9ef 100644 --- a/core/mpi/exception.cpp +++ b/core/mpi/exception.cpp @@ -30,10 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - -#include - #include +#include #include diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 4160d6eef1a..8a1f44a2e12 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -30,20 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #include -#include - - #include -#include "../gtest-mpi-listener.hpp" -#include "../gtest-mpi-main.hpp" - - #include #include #include @@ -51,6 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/mpi/gtest-mpi-listener.hpp" +#include "core/test/mpi/gtest-mpi-main.hpp" + + class MpiBindings : public ::testing::Test { protected: MpiBindings() : ref(gko::ReferenceExecutor::create()) {} diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index a443fe1125d..cef673fe6ba 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -30,13 +30,10 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #include -#include -#include "../gtest-mpi-listener.hpp" -#include "../gtest-mpi-main.hpp" +#include #include @@ -44,6 +41,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/mpi/gtest-mpi-listener.hpp" +#include "core/test/mpi/gtest-mpi-main.hpp" + + namespace { diff --git a/core/test/mpi/base/exception_helpers.cpp b/core/test/mpi/base/exception_helpers.cpp index 40ee9b04842..6cdeba2ef19 100644 --- a/core/test/mpi/base/exception_helpers.cpp +++ b/core/test/mpi/base/exception_helpers.cpp @@ -35,14 +35,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "../gtest-mpi-listener.hpp" -#include "../gtest-mpi-main.hpp" - #include #include +#include "core/test/mpi/gtest-mpi-listener.hpp" +#include "core/test/mpi/gtest-mpi-main.hpp" + + namespace { diff --git a/core/test/mpi/cuda-aware-mpi-test.cu b/core/test/mpi/cuda-aware-mpi-test.cu index 51ea1960d93..f19cbafc079 100644 --- a/core/test/mpi/cuda-aware-mpi-test.cu +++ b/core/test/mpi/cuda-aware-mpi-test.cu @@ -30,15 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - -#include -#include - #include +#include #include #include +#include + + int main(int argc, char* argv[]) { int num_cuda_devices = 0; diff --git a/core/test/mpi/gtest-mpi-listener.hpp b/core/test/mpi/gtest-mpi-listener.hpp index d1135c37bbc..04a88ea0a68 100644 --- a/core/test/mpi/gtest-mpi-listener.hpp +++ b/core/test/mpi/gtest-mpi-listener.hpp @@ -79,11 +79,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GTEST_MPI_MINIMAL_LISTENER_H #include +#include #include #include #include -#include "gtest/gtest.h" -#include "mpi.h" + + +#include namespace GTestMPIListener { diff --git a/core/test/mpi/gtest-mpi-main.hpp b/core/test/mpi/gtest-mpi-main.hpp index 56edbdf283b..99d9b2efe5c 100644 --- a/core/test/mpi/gtest-mpi-main.hpp +++ b/core/test/mpi/gtest-mpi-main.hpp @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #define GKO_DECLARE_GTEST_MPI_MAIN \ int main(int argc, char** argv) \ { \ diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 2457fddbe7c..5390cbcc949 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -450,7 +450,7 @@ class communicator : public EnableSharedCreateMethod { * * @param comm the communicator */ -double get_walltime() { return MPI_Wtime(); } +static double get_walltime() { return MPI_Wtime(); } /** @@ -459,7 +459,7 @@ double get_walltime() { return MPI_Wtime(); } * * @param comm the communicator */ -void synchronize(const std::shared_ptr& comm) +static void synchronize(const std::shared_ptr& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm->get())); } @@ -471,7 +471,8 @@ void synchronize(const std::shared_ptr& comm) * @param req The request to wait on. * @param status The status variable that can be queried. */ -void wait(std::shared_ptr req, std::shared_ptr status = {}) +static void wait(std::shared_ptr req, + std::shared_ptr status = {}) { if (status.get()) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(req->get(), status->get())); From fcdcb79ed7d21baf16c3bedda9868dfc46c1e6ba Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 8 Nov 2021 20:08:57 +0100 Subject: [PATCH 23/59] Rem unnecessary ex and code in gtest-mpi-listener --- core/test/mpi/gtest-mpi-listener.hpp | 216 +----------------- examples/CMakeLists.txt | 4 - examples/mpi-simple-solver/CMakeLists.txt | 17 -- examples/mpi-simple-solver/build.sh | 16 -- examples/mpi-simple-solver/data/A.mtx | 114 --------- examples/mpi-simple-solver/data/b.mtx | 21 -- examples/mpi-simple-solver/data/x0.mtx | 21 -- examples/mpi-simple-solver/doc/builds-on | 1 - examples/mpi-simple-solver/doc/intro.dox | 19 -- examples/mpi-simple-solver/doc/kind | 1 - examples/mpi-simple-solver/doc/results.dox | 35 --- examples/mpi-simple-solver/doc/short-intro | 1 - examples/mpi-simple-solver/doc/tooltip | 1 - .../mpi-simple-solver/mpi-simple-solver.cpp | 176 -------------- 14 files changed, 1 insertion(+), 642 deletions(-) delete mode 100644 examples/mpi-simple-solver/CMakeLists.txt delete mode 100644 examples/mpi-simple-solver/build.sh delete mode 100644 examples/mpi-simple-solver/data/A.mtx delete mode 100644 examples/mpi-simple-solver/data/b.mtx delete mode 100644 examples/mpi-simple-solver/data/x0.mtx delete mode 100644 examples/mpi-simple-solver/doc/builds-on delete mode 100644 examples/mpi-simple-solver/doc/intro.dox delete mode 100644 examples/mpi-simple-solver/doc/kind delete mode 100644 examples/mpi-simple-solver/doc/results.dox delete mode 100644 examples/mpi-simple-solver/doc/short-intro delete mode 100644 examples/mpi-simple-solver/doc/tooltip delete mode 100644 examples/mpi-simple-solver/mpi-simple-solver.cpp diff --git a/core/test/mpi/gtest-mpi-listener.hpp b/core/test/mpi/gtest-mpi-listener.hpp index 04a88ea0a68..976d65fdac5 100644 --- a/core/test/mpi/gtest-mpi-listener.hpp +++ b/core/test/mpi/gtest-mpi-listener.hpp @@ -38,48 +38,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SPDX-License-Identifier: (Apache-2.0 OR MIT) * *******************************************************************************/ -// -/******************************************************************************* - * An example from Google Test was copied with minor modifications. The - * license of Google Test is below. - * - * Google Test has the following copyright notice, which must be - * duplicated in its entirety per the terms of its license: - * - * Copyright 2005, Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *******************************************************************************/ #ifndef GTEST_MPI_MINIMAL_LISTENER_H #define GTEST_MPI_MINIMAL_LISTENER_H -#include #include +#include #include #include #include @@ -132,184 +96,6 @@ class MPIEnvironment : public ::testing::Environment { }; // class MPIEnvironment -// This class more or less takes the code in Google Test's -// MinimalistPrinter example and wraps certain parts of it in MPI calls, -// gathering all results onto rank zero. -class MPIMinimalistPrinter : public ::testing::EmptyTestEventListener { -public: - MPIMinimalistPrinter() - : ::testing::EmptyTestEventListener(), result_vector() - { - int is_mpi_initialized; - MPI_Initialized(&is_mpi_initialized); - if (!is_mpi_initialized) { - printf("MPI must be initialized before RUN_ALL_TESTS!\n"); - printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); - printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); - assert(0); - } - MPI_Comm_dup(MPI_COMM_WORLD, &comm); - UpdateCommState(); - } - - MPIMinimalistPrinter(MPI_Comm comm_) - : ::testing::EmptyTestEventListener(), result_vector() - { - int is_mpi_initialized; - MPI_Initialized(&is_mpi_initialized); - if (!is_mpi_initialized) { - printf("MPI must be initialized before RUN_ALL_TESTS!\n"); - printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); - printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); - assert(0); - } - - MPI_Comm_dup(comm_, &comm); - UpdateCommState(); - } - - MPIMinimalistPrinter(const MPIMinimalistPrinter& printer) - { - int is_mpi_initialized; - MPI_Initialized(&is_mpi_initialized); - if (!is_mpi_initialized) { - printf("MPI must be initialized before RUN_ALL_TESTS!\n"); - printf("Add '::testing::InitGoogleTest(&argc, argv);\n"); - printf(" MPI_Init(&argc, &argv);' to your 'main' function!\n"); - assert(0); - } - - MPI_Comm_dup(printer.comm, &comm); - UpdateCommState(); - result_vector = printer.result_vector; - } - - // Called before the Environment is torn down. - void OnEnvironmentTearDownStart() - { - int is_mpi_finalized; - ASSERT_EQ(MPI_Finalized(&is_mpi_finalized), MPI_SUCCESS); - if (!is_mpi_finalized) { - MPI_Comm_free(&comm); - } - } - - // Called before a test starts. - virtual void OnTestStart(const ::testing::TestInfo& test_info) - { - // Only need to report test start info on rank 0 - if (rank == 0) { - printf("*** Test %s.%s starting.\n", test_info.test_case_name(), - test_info.name()); - } - } - - // Called after an assertion failure or an explicit SUCCESS() macro. - // In an MPI program, this means that certain ranks may not call this - // function if a test part does not fail on all ranks. Consequently, it - // is difficult to have explicit synchronization points here. - virtual void OnTestPartResult( - const ::testing::TestPartResult& test_part_result) - { - result_vector.push_back(test_part_result); - } - - // Called after a test ends. - virtual void OnTestEnd(const ::testing::TestInfo& test_info) - { - int localResultCount = result_vector.size(); - std::vector resultCountOnRank(size, 0); - MPI_Gather(&localResultCount, 1, MPI_INT, &resultCountOnRank[0], 1, - MPI_INT, 0, comm); - - if (rank != 0) { - // Nonzero ranks send constituent parts of each result to rank 0 - for (int i = 0; i < localResultCount; i++) { - const ::testing::TestPartResult test_part_result = - result_vector.at(i); - int resultStatus(test_part_result.failed()); - std::string resultFileName(test_part_result.file_name()); - int resultLineNumber(test_part_result.line_number()); - std::string resultSummary(test_part_result.summary()); - - // Must add one for null termination - int resultFileNameSize(resultFileName.size() + 1); - int resultSummarySize(resultSummary.size() + 1); - - MPI_Send(&resultStatus, 1, MPI_INT, 0, rank, comm); - MPI_Send(&resultFileNameSize, 1, MPI_INT, 0, rank, comm); - MPI_Send(&resultLineNumber, 1, MPI_INT, 0, rank, comm); - MPI_Send(&resultSummarySize, 1, MPI_INT, 0, rank, comm); - MPI_Send(resultFileName.c_str(), resultFileNameSize, MPI_CHAR, - 0, rank, comm); - MPI_Send(resultSummary.c_str(), resultSummarySize, MPI_CHAR, 0, - rank, comm); - } - } else { - // Rank 0 first prints its local result data - for (int i = 0; i < localResultCount; i++) { - const ::testing::TestPartResult test_part_result = - result_vector.at(i); - printf(" %s on rank %d, %s:%d\n%s\n", - test_part_result.failed() ? "*** Failure" : "Success", - rank, test_part_result.file_name(), - test_part_result.line_number(), - test_part_result.summary()); - } - - for (int r = 1; r < size; r++) { - for (int i = 0; i < resultCountOnRank[r]; i++) { - int resultStatus, resultFileNameSize, resultLineNumber; - int resultSummarySize; - MPI_Recv(&resultStatus, 1, MPI_INT, r, r, comm, - MPI_STATUS_IGNORE); - MPI_Recv(&resultFileNameSize, 1, MPI_INT, r, r, comm, - MPI_STATUS_IGNORE); - MPI_Recv(&resultLineNumber, 1, MPI_INT, r, r, comm, - MPI_STATUS_IGNORE); - MPI_Recv(&resultSummarySize, 1, MPI_INT, r, r, comm, - MPI_STATUS_IGNORE); - - std::string resultFileName; - std::string resultSummary; - resultFileName.resize(resultFileNameSize); - resultSummary.resize(resultSummarySize); - MPI_Recv(&resultFileName[0], resultFileNameSize, MPI_CHAR, - r, r, comm, MPI_STATUS_IGNORE); - MPI_Recv(&resultSummary[0], resultSummarySize, MPI_CHAR, r, - r, comm, MPI_STATUS_IGNORE); - - printf(" %s on rank %d, %s:%d\n%s\n", - resultStatus ? "*** Failure" : "Success", r, - resultFileName.c_str(), resultLineNumber, - resultSummary.c_str()); - } - } - - printf("*** Test %s.%s ending.\n", test_info.test_case_name(), - test_info.name()); - } - - result_vector.clear(); - } - -private: - MPI_Comm comm; - int rank; - int size; - std::vector<::testing::TestPartResult> result_vector; - - int UpdateCommState() - { - int flag = MPI_Comm_rank(comm, &rank); - if (flag != MPI_SUCCESS) { - return flag; - } - flag = MPI_Comm_size(comm, &size); - return flag; - } - -}; // class MPIMinimalistPrinter // This class more or less takes the code in Google Test's // MinimalistPrinter example and wraps certain parts of it in MPI calls, diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5d7feccf592..151430846e1 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -31,10 +31,6 @@ set(EXAMPLES_LIST schroedinger-splitting simple-solver-logging) -if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST mpi-simple-solver) -endif() - if(GINKGO_BUILD_CUDA AND GINKGO_BUILD_OMP) list(APPEND EXAMPLES_LIST custom-matrix-format) endif() diff --git a/examples/mpi-simple-solver/CMakeLists.txt b/examples/mpi-simple-solver/CMakeLists.txt deleted file mode 100644 index 7f146a1972c..00000000000 --- a/examples/mpi-simple-solver/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -cmake_minimum_required(VERSION 3.9) -project(mpi-simple-solver) - -# We only need to find Ginkgo if we build this example stand-alone -if (NOT GINKGO_BUILD_EXAMPLES) - find_package(Ginkgo 1.5.0 REQUIRED) - find_package(MPI REQUIRED) -endif() - -add_executable(mpi-simple-solver mpi-simple-solver.cpp) -target_link_libraries(mpi-simple-solver Ginkgo::ginkgo) -target_link_libraries(mpi-simple-solver MPI::MPI_CXX) - -# Copy the data files to the execution directory -configure_file(data/A.mtx data/A.mtx COPYONLY) -configure_file(data/b.mtx data/b.mtx COPYONLY) -configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/mpi-simple-solver/build.sh b/examples/mpi-simple-solver/build.sh deleted file mode 100644 index 8b025d647b5..00000000000 --- a/examples/mpi-simple-solver/build.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# set up script -if [ $# -ne 1 ]; then - echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" - exit 1 -fi -BUILD_DIR=$1 -THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) - -source ${THIS_DIR}/../build-setup.sh - -# build -${CXX} -std=c++14 -o ${THIS_DIR}/mpi-simple-solver ${THIS_DIR}/mpi-simple-solver.cpp \ - -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ - -L${THIS_DIR} ${LINK_FLAGS} diff --git a/examples/mpi-simple-solver/data/A.mtx b/examples/mpi-simple-solver/data/A.mtx deleted file mode 100644 index c67437da567..00000000000 --- a/examples/mpi-simple-solver/data/A.mtx +++ /dev/null @@ -1,114 +0,0 @@ -%%MatrixMarket matrix coordinate integer symmetric -%------------------------------------------------------------------------------- -% UF Sparse Matrix Collection, Tim Davis -% http://www.cise.ufl.edu/research/sparse/matrices/JGD_Trefethen/Trefethen_20b -% name: JGD_Trefethen/Trefethen_20b -% [Diagonal matrices with primes, Nick Trefethen, Oxford Univ.] -% id: 2203 -% date: 2008 -% author: N. Trefethen -% ed: J.-G. Dumas -% fields: name title A id date author ed kind notes -% kind: combinatorial problem -%------------------------------------------------------------------------------- -% notes: -% Diagonal matrices with primes, Nick Trefethen, Oxford Univ. -% From Jean-Guillaume Dumas' Sparse Integer Matrix Collection, -% http://ljk.imag.fr/membres/Jean-Guillaume.Dumas/simc.html -% -% Problem 7 of the Hundred-dollar, Hundred-digit Challenge Problems, -% SIAM News, vol 35, no. 1. -% -% 7. Let A be the 20,000 x 20,000 matrix whose entries are zero -% everywhere except for the primes 2, 3, 5, 7, . . . , 224737 along the -% main diagonal and the number 1 in all the positions A(i,j) with -% |i-j| = 1,2,4,8, . . . ,16384. What is the (1,1) entry of inv(A)? -% -% http://www.siam.org/news/news.php?id=388 -% -% Filename in JGD collection: Trefethen/trefethen_20__19_minor.sms -%------------------------------------------------------------------------------- -19 19 83 -1 1 3 -2 1 1 -3 1 1 -5 1 1 -9 1 1 -17 1 1 -2 2 5 -3 2 1 -4 2 1 -6 2 1 -10 2 1 -18 2 1 -3 3 7 -4 3 1 -5 3 1 -7 3 1 -11 3 1 -19 3 1 -4 4 11 -5 4 1 -6 4 1 -8 4 1 -12 4 1 -5 5 13 -6 5 1 -7 5 1 -9 5 1 -13 5 1 -6 6 17 -7 6 1 -8 6 1 -10 6 1 -14 6 1 -7 7 19 -8 7 1 -9 7 1 -11 7 1 -15 7 1 -8 8 23 -9 8 1 -10 8 1 -12 8 1 -16 8 1 -9 9 29 -10 9 1 -11 9 1 -13 9 1 -17 9 1 -10 10 31 -11 10 1 -12 10 1 -14 10 1 -18 10 1 -11 11 37 -12 11 1 -13 11 1 -15 11 1 -19 11 1 -12 12 41 -13 12 1 -14 12 1 -16 12 1 -13 13 43 -14 13 1 -15 13 1 -17 13 1 -14 14 47 -15 14 1 -16 14 1 -18 14 1 -15 15 53 -16 15 1 -17 15 1 -19 15 1 -16 16 59 -17 16 1 -18 16 1 -17 17 61 -18 17 1 -19 17 1 -18 18 67 -19 18 1 -19 19 71 diff --git a/examples/mpi-simple-solver/data/b.mtx b/examples/mpi-simple-solver/data/b.mtx deleted file mode 100644 index 05d92ecc6f7..00000000000 --- a/examples/mpi-simple-solver/data/b.mtx +++ /dev/null @@ -1,21 +0,0 @@ -%%MatrixMarket matrix array real general -19 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/examples/mpi-simple-solver/data/x0.mtx b/examples/mpi-simple-solver/data/x0.mtx deleted file mode 100644 index 91d470cdbcd..00000000000 --- a/examples/mpi-simple-solver/data/x0.mtx +++ /dev/null @@ -1,21 +0,0 @@ -%%MatrixMarket matrix array real general -19 1 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/examples/mpi-simple-solver/doc/builds-on b/examples/mpi-simple-solver/doc/builds-on deleted file mode 100644 index 8b137891791..00000000000 --- a/examples/mpi-simple-solver/doc/builds-on +++ /dev/null @@ -1 +0,0 @@ - diff --git a/examples/mpi-simple-solver/doc/intro.dox b/examples/mpi-simple-solver/doc/intro.dox deleted file mode 100644 index 70bc1ce3cc7..00000000000 --- a/examples/mpi-simple-solver/doc/intro.dox +++ /dev/null @@ -1,19 +0,0 @@ - -

Introduction

-This simple solver example should help you get started with Ginkgo. This example is meant for you to understand -how Ginkgo works and how you can solve a simple linear system with Ginkgo. We encourage you to play with the code, -change the parameters and see what is best suited for your purposes. - -

About the example

-Each example has the following sections: -
    -
  1. Introduction:This gives an overview of the example and mentions - any interesting aspects in the example that might help the reader. -
  2. The commented program: This section is intended for you to - understand the details of the example so that you can play with it and understand - Ginkgo and its features better. -
  3. Results: This section shows the results of the code when run. Though the - results may not be completely the same, you can expect the behaviour to be similar. -
  4. The plain program: This is the complete code without any comments to have - an complete overview of the code. -
diff --git a/examples/mpi-simple-solver/doc/kind b/examples/mpi-simple-solver/doc/kind deleted file mode 100644 index 15a13db4511..00000000000 --- a/examples/mpi-simple-solver/doc/kind +++ /dev/null @@ -1 +0,0 @@ -basic diff --git a/examples/mpi-simple-solver/doc/results.dox b/examples/mpi-simple-solver/doc/results.dox deleted file mode 100644 index 69a4ef8211c..00000000000 --- a/examples/mpi-simple-solver/doc/results.dox +++ /dev/null @@ -1,35 +0,0 @@ -

Results

-The following is the expected result: - -@code{.cpp} - -Solution (x): -%%MatrixMarket matrix array real general -19 1 -0.252218 -0.108645 -0.0662811 -0.0630433 -0.0384088 -0.0396536 -0.0402648 -0.0338935 -0.0193098 -0.0234653 -0.0211499 -0.0196413 -0.0199151 -0.0181674 -0.0162722 -0.0150714 -0.0107016 -0.0121141 -0.0123025 -Residual norm sqrt(r^T r): -%%MatrixMarket matrix array real general -1 1 -2.10788e-15 - -@endcode - -

Comments about programming and debugging

diff --git a/examples/mpi-simple-solver/doc/short-intro b/examples/mpi-simple-solver/doc/short-intro deleted file mode 100644 index 578df81366b..00000000000 --- a/examples/mpi-simple-solver/doc/short-intro +++ /dev/null @@ -1 +0,0 @@ -The simple solver example. diff --git a/examples/mpi-simple-solver/doc/tooltip b/examples/mpi-simple-solver/doc/tooltip deleted file mode 100644 index 52c4f6d5f44..00000000000 --- a/examples/mpi-simple-solver/doc/tooltip +++ /dev/null @@ -1 +0,0 @@ -Solve a simple linear system with CG. Read a matrix and right hand side from a file. diff --git a/examples/mpi-simple-solver/mpi-simple-solver.cpp b/examples/mpi-simple-solver/mpi-simple-solver.cpp deleted file mode 100644 index fcda3f44927..00000000000 --- a/examples/mpi-simple-solver/mpi-simple-solver.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -// @sect3{Include files} - -// This is the main ginkgo header file. -#include - -// Add the fstream header to read from data from files. -#include -// Add the C++ iostream header to output information to the console. -#include -// Add the STL map header for the executor selection -#include -// Add the string manipulation header to handle strings. -#include - - -int main(int argc, char* argv[]) -{ - auto mpi_init_fin = gko::mpi::init_finalize(argc, argv); - { - // Use some shortcuts. In Ginkgo, vectors are seen as a - // gko::matrix::Dense with one column/one row. The advantage of this - // concept is that using multiple vectors is a now a natural extension - // of adding columns/rows are necessary. - using ValueType = double; - using RealValueType = gko::remove_complex; - using IndexType = int; - using vec = gko::matrix::Dense; - using real_vec = gko::matrix::Dense; - // The gko::matrix::Csr class is used here, but any other matrix class - // such as gko::matrix::Coo, gko::matrix::Hybrid, gko::matrix::Ell or - // gko::matrix::Sellp could also be used. - using mtx = gko::matrix::Csr; - // The gko::solver::Cg is used here, but any other solver class can also - // be used. - using cg = gko::solver::Cg; - - // Print the ginkgo version information. - std::cout << gko::version_info::get() << std::endl; - - if (argc == 2 && (std::string(argv[1]) == "--help")) { - std::cerr << "Usage: " << argv[0] << " [executor] " << std::endl; - std::exit(-1); - } - - // @sect3{Where do you want to run your solver ?} - // The gko::Executor class is one of the cornerstones of Ginkgo. - // Currently, we have support for an gko::OmpExecutor, which uses OpenMP - // multi-threading in most of its kernels, a gko::ReferenceExecutor, a - // single threaded specialization of the OpenMP executor and a - // gko::CudaExecutor which runs the code on a NVIDIA GPU if available. - // @note With the help of C++, you see that you only ever need to change - // the executor and all the other functions/ routines within Ginkgo - // should automatically work and run on the executor with any other - // changes. - const auto executor_string = argc >= 2 ? argv[1] : "reference"; - std::map()>> - exec_map{ - {"omp", [] { return gko::OmpExecutor::create(); }}, - {"cuda", - [] { - return gko::CudaExecutor::create( - 0, gko::OmpExecutor::create(), true); - }}, - {"hip", - [] { - return gko::HipExecutor::create( - 0, gko::OmpExecutor::create(), true); - }}, - {"dpcpp", - [] { - return gko::DpcppExecutor::create( - 0, gko::OmpExecutor::create()); - }}, - {"reference", [] { return gko::ReferenceExecutor::create(); }}}; - - // executor where Ginkgo will perform the computation - const auto exec = - exec_map.at(executor_string)(); // throws if not valid - - // @sect3{Reading your data and transfer to the proper device.} - // Read the matrix, right hand side and the initial solution using the - // @ref read function. - // @note Ginkgo uses C++ smart pointers to automatically manage memory. - // To this end, we use our own object ownership transfer functions that - // under the hood call the required smart pointer functions to manage - // object ownership. The gko::share , gko::give and gko::lend are the - // functions that you would need to use. - auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); - auto b = gko::read(std::ifstream("data/b.mtx"), exec); - auto x = gko::read(std::ifstream("data/x0.mtx"), exec); - - // @sect3{Creating the solver} - // Generate the gko::solver factory. Ginkgo uses the concept of - // Factories to build solvers with certain properties. Observe the - // Fluent interface used here. Here a cg solver is generated with a - // stopping criteria of maximum iterations of 20 and a residual norm - // reduction of 1e-7. You also observe that the stopping - // criteria(gko::stop) are also generated from factories using their - // build methods. You need to specify the executors which each of the - // object needs to be built on. - const RealValueType reduction_factor{1e-7}; - auto solver_gen = - cg::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(20u).on(exec), - gko::stop::ResidualNorm::build() - .with_reduction_factor(reduction_factor) - .on(exec)) - .on(exec); - // Generate the solver from the matrix. The solver factory built in the - // previous step takes a "matrix"(a gko::LinOp to be more general) as an - // input. In this case we provide it with a full matrix that we - // previously read, but as the solver only effectively uses the apply() - // method within the provided "matrix" object, you can effectively - // create a gko::LinOp class with your own apply implementation to - // accomplish more tasks. We will see an example of how this can be done - // in the custom-matrix-format example - auto solver = solver_gen->generate(A); - - // Finally, solve the system. The solver, being a gko::LinOp, can be - // applied to a right hand side, b to obtain the solution, x. - solver->apply(lend(b), lend(x)); - - // Print the solution to the command line. - std::cout << "Solution (x):\n"; - write(std::cout, lend(x)); - - // To measure if your solution has actually converged, you can measure - // the error of the solution. one, neg_one are objects that represent - // the numbers which allow for a uniform interface when computing on any - // device. To compute the residual, all you need to do is call the apply - // method, which in this case is an spmv and equivalent to the LAPACK - // z_spmv routine. Finally, you compute the euclidean 2-norm with the - // compute_norm2 function. - auto one = gko::initialize({1.0}, exec); - auto neg_one = gko::initialize({-1.0}, exec); - auto res = gko::initialize({0.0}, exec); - A->apply(lend(one), lend(x), lend(neg_one), lend(b)); - b->compute_norm2(lend(res)); - - std::cout << "Residual norm sqrt(r^T r):\n"; - write(std::cout, lend(res)); - } -} From 06a4f36b66cbd6561e98948df96cf4091d28a901 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Tue, 9 Nov 2021 10:01:54 +0000 Subject: [PATCH 24/59] Format files Co-authored-by: Pratik Nayak --- core/test/mpi/gtest-mpi-listener.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/test/mpi/gtest-mpi-listener.hpp b/core/test/mpi/gtest-mpi-listener.hpp index 976d65fdac5..2afeb8aa38c 100644 --- a/core/test/mpi/gtest-mpi-listener.hpp +++ b/core/test/mpi/gtest-mpi-listener.hpp @@ -42,8 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef GTEST_MPI_MINIMAL_LISTENER_H #define GTEST_MPI_MINIMAL_LISTENER_H -#include #include +#include #include #include #include From 4f5ae1d107b72d2bb36152fdeeceb7d3e3934e09 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 10 Nov 2021 18:44:38 +0100 Subject: [PATCH 25/59] Library fixes and gtes-mpi-listener updates Co-authored-by: Tobias Ribizel --- ABOUT-LICENSING.md | 32 ++++++++++++ cmake/GinkgoConfig.cmake.in | 7 ++- cmake/create_test.cmake | 9 +--- core/CMakeLists.txt | 5 -- core/mpi/get_info.cmake | 2 +- core/test/mpi/base/bindings.cpp | 9 ---- core/test/mpi/base/communicator.cpp | 8 --- core/test/mpi/base/exception_helpers.cpp | 9 ---- core/test/mpi/gtest-mpi-main.hpp | 51 ------------------- devices/CMakeLists.txt | 3 -- devices/mpi/CMakeLists.txt | 2 - devices/mpi/dummy.cpp | 34 ------------- third_party/gtest/CMakeLists.txt | 11 +++- .../gtest/gtest_mpi_listener.cpp | 21 ++++++-- 14 files changed, 66 insertions(+), 137 deletions(-) delete mode 100644 core/test/mpi/gtest-mpi-main.hpp delete mode 100644 devices/mpi/CMakeLists.txt delete mode 100644 devices/mpi/dummy.cpp rename core/test/mpi/gtest-mpi-listener.hpp => third_party/gtest/gtest_mpi_listener.cpp (96%) diff --git a/ABOUT-LICENSING.md b/ABOUT-LICENSING.md index e9215ee35cf..f7f67b5030f 100644 --- a/ABOUT-LICENSING.md +++ b/ABOUT-LICENSING.md @@ -288,3 +288,35 @@ __NOTE:__ Some of the options that pull additional software when compiling Ginkgo are ON by default, and have to be disabled manually to prevent third-party licensing. Refer to the [Installation section in INSTALL.md](INSTALL.md#Building) for more details. + + +When using testing with MPI switched on, the gtest-mpi-listener header only library is used for testing MPI functionality. The repository is licensed triple licensed under BSD-3, MIT and Apache 2.0. The License duplicated below. More details on the License and the library are [available on github](https://github.com/LLNL/gtest-mpi-listener) + + +> # Copyright 2005, Google Inc. All rights reserved. +> # +> # Redistribution and use in source and binary forms, with or without +> # modification, are permitted provided that the following conditions are +> # met: +> # +> # * Redistributions of source code must retain the above copyright +> # notice, this list of conditions and the following disclaimer. +> # * Redistributions in binary form must reproduce the above +> # copyright notice, this list of conditions and the following disclaimer +> # in the documentation and/or other materials provided with the +> # distribution. +> # * Neither the name of Google Inc. nor the names of its +> # contributors may be used to endorse or promote products derived from +> # this software without specific prior written permission. +> # +> # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +> # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +> # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +> # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +> # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +> # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +> # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +> # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +> # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +> # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +> # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index 2c129dfaa6a..2f2fdb417ca 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -83,8 +83,6 @@ set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@) set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@) set(GINKGO_HAVE_MPI @GINKGO_HAVE_MPI@) -set(GINKGO_HAVE_CUDA_AWARE_MPI @GINKGO_HAVE_CUDA_AWARE_MPI@) -set(GINKGO_FORCE_CUDA_AWARE_MPI @GINKGO_FORCE_CUDA_AWARE_MPI@) set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@) @@ -156,6 +154,11 @@ if(GINKGO_HAVE_HWLOC) find_package(HWLOC REQUIRED) endif() +# Check for MPI if it is enabled +if(GINKGO_HAVE_MPI) + find_package(MPI REQUIRED) +endif() + # HIP and OpenMP depend on Threads::Threads in some circumstances, but don't find it if (GINKGO_BUILD_HIP OR GINKGO_BUILD_OMP) find_package(Threads REQUIRED) diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index c0f2ecd260f..de5fd2bbcda 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -65,11 +65,6 @@ function(ginkgo_create_mpi_test test_name num_mpi_procs) ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - target_include_directories("${TEST_TARGET_NAME}" - PRIVATE - "$" - ${MPI_INCLUDE_PATH} - ) set_target_properties(${TEST_TARGET_NAME} PROPERTIES OUTPUT_NAME ${test_name}) if (GINKGO_CHECK_CIRCULAR_DEPS) @@ -80,11 +75,11 @@ function(ginkgo_create_mpi_test test_name num_mpi_procs) else() set(OPENMPI_RUN_AS_ROOT_FLAG "") endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) + target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::MPI_main GTest::GTest ${ARGN}) target_link_libraries(${TEST_TARGET_NAME} PRIVATE MPI::MPI_CXX) set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) add_test(NAME ${REL_BINARY_DIR}/${test_name} - COMMAND ${MPIEXEC_EXECUTABLE} ${test_param} ) + COMMAND ${MPIEXEC_EXECUTABLE} ${test_param}) endfunction(ginkgo_create_mpi_test) function(ginkgo_create_test_cpp_cuda_header test_name) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index a0e62f31ccd..68b116315a4 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -84,11 +84,6 @@ endif() if (GINKGO_BUILD_MPI) target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX) - target_include_directories(ginkgo SYSTEM PUBLIC ${MPI_INCLUDE_PATH}) - - # Propagate some useful information - set(MPI_C_VERSION ${MPI_C_VERSION} PARENT_SCOPE) - set(MPI_C_LIBRARIES ${MPI_C_LIBRARIES} PARENT_SCOPE) endif() # Since we have a public dependency on HIP, this dependency appears diff --git a/core/mpi/get_info.cmake b/core/mpi/get_info.cmake index ebcea60e4ae..2a735a46504 100644 --- a/core/mpi/get_info.cmake +++ b/core/mpi/get_info.cmake @@ -7,7 +7,7 @@ ginkgo_print_variable(${detailed_log} "MPI_CXX_COMPILE_OPTIONS") ginkgo_print_variable(${detailed_log} "MPI_CXX_LINK_FLAGS") ginkgo_print_variable(${detailed_log} "MPI_CXX_LIB_NAMES") ginkgo_print_variable(${detailed_log} "MPI_CXX_HEADER_DIR") -ginkgo_print_variable(${detailed_log} "MPI_mpi_LIBRARY") +ginkgo_print_variable(${detailed_log} "MPI_CXX_LIBRARY") ginkgo_print_variable(${detailed_log} "MPIEXEC_EXECUTABLE") ginkgo_print_variable(${detailed_log} "MPIEXEC_MAX_NUMPROCS") ginkgo_print_variable(${detailed_log} "MPIEXEC_NUMPROC_FLAG") diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 8a1f44a2e12..eae7ef49861 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -43,10 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/test/mpi/gtest-mpi-listener.hpp" -#include "core/test/mpi/gtest-mpi-main.hpp" - - class MpiBindings : public ::testing::Test { protected: MpiBindings() : ref(gko::ReferenceExecutor::create()) {} @@ -787,8 +783,3 @@ TEST_F(MpiBindings, CanScanValues) EXPECT_EQ(min, 2.0); } } - - -// Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for -// more details. -GKO_DECLARE_GTEST_MPI_MAIN; diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index cef673fe6ba..b6ce9cabb8a 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -41,10 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/test/mpi/gtest-mpi-listener.hpp" -#include "core/test/mpi/gtest-mpi-main.hpp" - - namespace { @@ -185,7 +181,3 @@ TEST_F(Communicator, CanSetCustomCommunicator) } // namespace - -// Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for -// more details. -GKO_DECLARE_GTEST_MPI_MAIN; diff --git a/core/test/mpi/base/exception_helpers.cpp b/core/test/mpi/base/exception_helpers.cpp index 6cdeba2ef19..7b182dbb887 100644 --- a/core/test/mpi/base/exception_helpers.cpp +++ b/core/test/mpi/base/exception_helpers.cpp @@ -40,10 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/test/mpi/gtest-mpi-listener.hpp" -#include "core/test/mpi/gtest-mpi-main.hpp" - - namespace { @@ -60,8 +56,3 @@ TEST(AssertNoMpiErrors, DoesNotThrowOnSuccess) } // namespace - - -// Calls a custom gtest main with MPI listeners. See gtest-mpi-listeners.hpp for -// more details. -GKO_DECLARE_GTEST_MPI_MAIN; diff --git a/core/test/mpi/gtest-mpi-main.hpp b/core/test/mpi/gtest-mpi-main.hpp deleted file mode 100644 index 99d9b2efe5c..00000000000 --- a/core/test/mpi/gtest-mpi-main.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#define GKO_DECLARE_GTEST_MPI_MAIN \ - int main(int argc, char** argv) \ - { \ - ::testing::InitGoogleTest(&argc, argv); \ - MPI_Init(&argc, &argv); \ - ::testing::AddGlobalTestEnvironment( \ - new GTestMPIListener::MPIEnvironment); \ - ::testing::TestEventListeners& listeners = \ - ::testing::UnitTest::GetInstance()->listeners(); \ - ::testing::TestEventListener* l = \ - listeners.Release(listeners.default_result_printer()); \ - listeners.Append( \ - new GTestMPIListener::MPIWrapperPrinter(l, MPI_COMM_WORLD)); \ - int result = RUN_ALL_TESTS(); \ - return 0; \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") diff --git a/devices/CMakeLists.txt b/devices/CMakeLists.txt index f87e5fc9144..09797aafe49 100644 --- a/devices/CMakeLists.txt +++ b/devices/CMakeLists.txt @@ -23,7 +23,4 @@ add_subdirectory(cuda) add_subdirectory(dpcpp) add_subdirectory(hip) add_subdirectory(omp) -if(GINKGO_BUILD_MPI) - add_subdirectory(mpi) -endif() add_subdirectory(reference) diff --git a/devices/mpi/CMakeLists.txt b/devices/mpi/CMakeLists.txt deleted file mode 100644 index e86b0e0325c..00000000000 --- a/devices/mpi/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -ginkgo_add_object_library(ginkgo_mpi_device - dummy.cpp) diff --git a/devices/mpi/dummy.cpp b/devices/mpi/dummy.cpp deleted file mode 100644 index 14e18b6d1f9..00000000000 --- a/devices/mpi/dummy.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -// Remove this file once there is at least one source file in -// ginkgo_mpi_device diff --git a/third_party/gtest/CMakeLists.txt b/third_party/gtest/CMakeLists.txt index 25f340ce122..52b49207596 100644 --- a/third_party/gtest/CMakeLists.txt +++ b/third_party/gtest/CMakeLists.txt @@ -22,4 +22,13 @@ set_target_properties(gtest gtest_main PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${GINKGO_LIBRARY_PATH}") # by default, the outdated targets are not being exported add_library(GTest::Main ALIAS gtest_main) -add_library(GTest::GTest ALIAS gtest) \ No newline at end of file +add_library(GTest::GTest ALIAS gtest) +if(GINKGO_BUILD_MPI) + add_library(gtest_mpi_main "") + target_sources(gtest_mpi_main + PRIVATE + gtest_mpi_listener.cpp) + find_package(MPI REQUIRED) + target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX) + add_library(GTest::MPI_main ALIAS gtest_mpi_main) +endif() diff --git a/core/test/mpi/gtest-mpi-listener.hpp b/third_party/gtest/gtest_mpi_listener.cpp similarity index 96% rename from core/test/mpi/gtest-mpi-listener.hpp rename to third_party/gtest/gtest_mpi_listener.cpp index 2afeb8aa38c..d2639c7dfda 100644 --- a/core/test/mpi/gtest-mpi-listener.hpp +++ b/third_party/gtest/gtest_mpi_listener.cpp @@ -39,11 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * *******************************************************************************/ -#ifndef GTEST_MPI_MINIMAL_LISTENER_H -#define GTEST_MPI_MINIMAL_LISTENER_H - -#include #include +#include #include #include #include @@ -373,4 +370,18 @@ class MPIWrapperPrinter : public ::testing::TestEventListener { } // namespace GTestMPIListener -#endif /* GTEST_MPI_MINIMAL_LISTENER_H */ + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + MPI_Init(&argc, &argv); + ::testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment); + ::testing::TestEventListeners& listeners = + ::testing::UnitTest::GetInstance()->listeners(); + ::testing::TestEventListener* l = + listeners.Release(listeners.default_result_printer()); + listeners.Append( + new GTestMPIListener::MPIWrapperPrinter(l, MPI_COMM_WORLD)); + int result = RUN_ALL_TESTS(); + return 0; +} From c0a82cbefce6c8aedfc118378ba322ea57c4d85f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 10 Nov 2021 18:53:55 +0100 Subject: [PATCH 26/59] Some more fixes for cmake and MPI types --- cmake/GinkgoConfig.cmake.in | 2 +- core/test/mpi/base/bindings.cpp | 23 +- include/ginkgo/core/base/mpi.hpp | 432 +++++++++++++++---------------- 3 files changed, 218 insertions(+), 239 deletions(-) diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index 2f2fdb417ca..ec79a229ad4 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -83,11 +83,11 @@ set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@) set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@) set(GINKGO_HAVE_MPI @GINKGO_HAVE_MPI@) +set(GINKGO_HAVE_CUDA_AWARE_MPI @GINKGO_HAVE_CUDA_AWARE_MPI@) set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@) set(GINKGO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@) -set(GINKGO_BUILD_HWLOC @GINKGO_BUILD_HWLOC@) # Ginkgo installation configuration set(GINKGO_CONFIG_FILE_PATH "${CMAKE_CURRENT_LIST_DIR}") diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index eae7ef49861..ccbd419e5c3 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -399,12 +399,9 @@ TEST_F(MpiBindings, CanReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::reduce(&data, &sum, 1, gko::mpi::op_type::sum, 0, - comm); - gko::mpi::reduce(&data, &max, 1, gko::mpi::op_type::max, 0, - comm); - gko::mpi::reduce(&data, &min, 1, gko::mpi::op_type::min, 0, - comm); + gko::mpi::reduce(&data, &sum, 1, MPI_SUM, 0, comm); + gko::mpi::reduce(&data, &max, 1, MPI_MAX, 0, comm); + gko::mpi::reduce(&data, &min, 1, MPI_MIN, 0, comm); if (my_rank == 0) { EXPECT_EQ(sum, 16.0); EXPECT_EQ(max, 6.0); @@ -428,7 +425,7 @@ TEST_F(MpiBindings, CanAllReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, &sum, 1, gko::mpi::op_type::sum, comm); + gko::mpi::all_reduce(&data, &sum, 1, MPI_SUM, comm); ASSERT_EQ(sum, 16); } @@ -448,7 +445,7 @@ TEST_F(MpiBindings, CanAllReduceValuesInPlace) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, 1, gko::mpi::op_type::sum, comm); + gko::mpi::all_reduce(&data, 1, MPI_SUM, comm); ASSERT_EQ(data, 16); } @@ -549,7 +546,7 @@ TEST_F(MpiBindings, CanScatterValuesWithDisplacements) scatter_into_array = gko::Array{ref, static_cast(nelems)}; gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0, comm); - gko::mpi::scatterv( + gko::mpi::scatter_v( scatter_from_array.get_data(), s_counts.get_data(), displacements.get_data(), scatter_into_array.get_data(), nelems, 0, comm); @@ -614,7 +611,7 @@ TEST_F(MpiBindings, CanGatherValuesWithDisplacements) } gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0, comm); - gko::mpi::gatherv( + gko::mpi::gather_v( gather_from_array.get_data(), nelems, gather_into_array.get_data(), r_counts.get_data(), displacements.get_data(), 0, comm); auto comp_data = gather_into_array.get_data(); @@ -762,9 +759,9 @@ TEST_F(MpiBindings, CanScanValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::scan(&data, &sum, 1, gko::mpi::op_type::sum, comm); - gko::mpi::scan(&data, &max, 1, gko::mpi::op_type::max, comm); - gko::mpi::scan(&data, &min, 1, gko::mpi::op_type::min, comm); + gko::mpi::scan(&data, &sum, 1, MPI_SUM, comm); + gko::mpi::scan(&data, &max, 1, MPI_MAX, comm); + gko::mpi::scan(&data, &min, 1, MPI_MIN, comm); if (my_rank == 0) { EXPECT_EQ(sum, 3.0); EXPECT_EQ(max, 3.0); diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 5390cbcc949..77cb3e3e9b9 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -60,27 +60,6 @@ namespace gko { namespace mpi { -/* - * This enum is used for selecting the operation type for functions that take - * MPI_Op. For example the MPI_Reduce operations. - */ -enum class op_type { - sum = 1, - min = 2, - max = 3, - product = 4, - custom = 5, - logical_and = 6, - bitwise_and = 7, - logical_or = 8, - bitwise_or = 9, - logical_xor = 10, - bitwise_xor = 11, - max_val_and_loc = 12, - min_val_and_loc = 13 -}; - - /* * This enum specifies the threading type to be used when creating an MPI * environment. @@ -95,59 +74,95 @@ enum class thread_type { namespace detail { -#define GKO_MPI_DATATYPE(BaseType, MPIType) \ - inline MPI_Datatype get_mpi_type(const BaseType&) { return MPIType; } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") +template +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_C_BOOL; +} -GKO_MPI_DATATYPE(bool, MPI_C_BOOL); -GKO_MPI_DATATYPE(char, MPI_CHAR); -GKO_MPI_DATATYPE(unsigned char, MPI_UNSIGNED_CHAR); -GKO_MPI_DATATYPE(unsigned, MPI_UNSIGNED); -GKO_MPI_DATATYPE(int, MPI_INT); -GKO_MPI_DATATYPE(unsigned long, MPI_UNSIGNED_LONG); -GKO_MPI_DATATYPE(unsigned short, MPI_UNSIGNED_SHORT); -GKO_MPI_DATATYPE(long, MPI_LONG); -GKO_MPI_DATATYPE(float, MPI_FLOAT); -GKO_MPI_DATATYPE(double, MPI_DOUBLE); -GKO_MPI_DATATYPE(long double, MPI_LONG_DOUBLE); -GKO_MPI_DATATYPE(std::complex, MPI_C_COMPLEX); -GKO_MPI_DATATYPE(std::complex, MPI_C_DOUBLE_COMPLEX); +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_CHAR; +} -template -MPI_Op get_operation(gko::mpi::op_type op) + +template <> +constexpr MPI_Datatype mpi_type_impl() { - switch (op) { - case gko::mpi::op_type::sum: - return MPI_SUM; - case gko::mpi::op_type::min: - return MPI_MIN; - case gko::mpi::op_type::max: - return MPI_MAX; - case gko::mpi::op_type::product: - return MPI_PROD; - case gko::mpi::op_type::logical_and: - return MPI_LAND; - case gko::mpi::op_type::bitwise_and: - return MPI_BAND; - case gko::mpi::op_type::logical_or: - return MPI_LOR; - case gko::mpi::op_type::bitwise_or: - return MPI_BOR; - case gko::mpi::op_type::logical_xor: - return MPI_LXOR; - case gko::mpi::op_type::bitwise_xor: - return MPI_BXOR; - case gko::mpi::op_type::max_val_and_loc: - return MPI_MAXLOC; - case gko::mpi::op_type::min_val_and_loc: - return MPI_MINLOC; - default: - GKO_NOT_SUPPORTED(op); - } + return MPI_UNSIGNED_CHAR; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_UNSIGNED; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_INT; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_UNSIGNED_SHORT; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_UNSIGNED_LONG; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_LONG; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_FLOAT; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_DOUBLE; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl() +{ + return MPI_LONG_DOUBLE; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl>() +{ + return MPI_C_COMPLEX; +} + + +template <> +constexpr MPI_Datatype mpi_type_impl>() +{ + return MPI_C_DOUBLE_COMPLEX; } @@ -161,6 +176,13 @@ inline const T* in_place() } // namespace detail +template +constexpr MPI_Datatype get_type() +{ + return detail::mpi_type_impl(); +} + + /* * Class that sets up and finalizes the MPI exactly once per program execution. * using the singleton pattern. This must be called before any of the MPI @@ -450,7 +472,7 @@ class communicator : public EnableSharedCreateMethod { * * @param comm the communicator */ -static double get_walltime() { return MPI_Wtime(); } +inline double get_walltime() { return MPI_Wtime(); } /** @@ -459,7 +481,7 @@ static double get_walltime() { return MPI_Wtime(); } * * @param comm the communicator */ -static void synchronize(const std::shared_ptr& comm) +inline void synchronize(const std::shared_ptr& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm->get())); } @@ -471,14 +493,11 @@ static void synchronize(const std::shared_ptr& comm) * @param req The request to wait on. * @param status The status variable that can be queried. */ -static void wait(std::shared_ptr req, +inline void wait(std::shared_ptr req, std::shared_ptr status = {}) { - if (status.get()) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(req->get(), status->get())); - } else { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(req->get(), MPI_STATUS_IGNORE)); - } + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Wait(req->get(), status ? status->get() : MPI_STATUS_IGNORE)); } @@ -497,10 +516,14 @@ class window { enum class lock_type { shared = 1, exclusive = 2 }; window() : window_(MPI_WIN_NULL) {} - window(window& other) = default; - window& operator=(const window& other) = default; - window(window&& other) = default; - window& operator=(window&& other) = default; + window(const window& other) = delete; + window& operator=(const window& other) = delete; + window(window&& other) : window_{std::exchange(other.window_, MPI_WIN_NULL)} + {} + window& operator=(window&& other) + { + window_ = std::exchange(other.window_, MPI_WIN_NULL); + } window(ValueType* base, unsigned int size, std::shared_ptr comm, @@ -606,9 +629,9 @@ void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, send_type, - destination_rank, send_tag, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, + get_type(), destination_rank, + send_tag, comm->get())); } @@ -628,11 +651,9 @@ void send(const SendType* send_buffer, const int send_count, std::shared_ptr req, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - - GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, send_type, - destination_rank, send_tag, comm->get(), - req->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, + get_type(), destination_rank, + send_tag, comm->get(), req->get())); } @@ -650,10 +671,9 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, const int recv_tag, std::shared_ptr comm, std::shared_ptr status = {}) { - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Recv(recv_buffer, recv_count, recv_type, source_rank, recv_tag, - comm->get(), status ? status->get() : MPI_STATUS_IGNORE)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv( + recv_buffer, recv_count, get_type(), source_rank, recv_tag, + comm->get(), status ? status->get() : MPI_STATUS_IGNORE)); } @@ -672,10 +692,9 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, const int recv_tag, std::shared_ptr req, std::shared_ptr comm) { - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, recv_type, - source_rank, recv_tag, comm->get(), - req->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, + get_type(), source_rank, + recv_tag, comm->get(), req->get())); } @@ -694,10 +713,9 @@ void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - auto put_type = detail::get_mpi_type(origin_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Put(origin_buffer, origin_count, put_type, - target_rank, target_disp, target_count, - put_type, window.get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Put(origin_buffer, origin_count, get_type(), target_rank, + target_disp, target_count, get_type(), window.get())); } @@ -718,10 +736,10 @@ void put(const PutType* origin_buffer, const int origin_count, const int target_count, window& window, std::shared_ptr req) { - auto put_type = detail::get_mpi_type(origin_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput(origin_buffer, origin_count, put_type, - target_rank, target_disp, target_count, - put_type, window.get(), req->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Rput(origin_buffer, origin_count, get_type(), target_rank, + target_disp, target_count, get_type(), window.get(), + req->get())); } @@ -740,10 +758,9 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - auto get_type = detail::get_mpi_type(origin_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Get(origin_buffer, origin_count, get_type, - target_rank, target_disp, target_count, - get_type, window.get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Get(origin_buffer, origin_count, get_type(), target_rank, + target_disp, target_count, get_type(), window.get())); } @@ -763,10 +780,9 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window, std::shared_ptr req) { - auto get_type = detail::get_mpi_type(origin_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(origin_buffer, origin_count, get_type, - target_rank, target_disp, target_count, - get_type, window, req->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( + origin_buffer, origin_count, get_type(), target_rank, + target_disp, target_count, get_type(), window, req->get())); } @@ -782,9 +798,8 @@ template void broadcast(BroadcastType* buffer, int count, int root_rank, std::shared_ptr comm) { - auto bcast_type = detail::get_mpi_type(buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Bcast(buffer, count, bcast_type, root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count, get_type(), + root_rank, comm->get())); } @@ -794,19 +809,17 @@ void broadcast(BroadcastType* buffer, int count, int root_rank, * @param send_buffer the buffer to reduce * @param recv_buffer the reduced result * @param count the number of elements to reduce - * @param op_enum the reduce operation. See @op_type + * @param operation the MPI_Op type reduce operation. * @param comm the communicator */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum, int root_rank, + MPI_Op operation, int root_rank, std::shared_ptr comm) { - auto operation = detail::get_operation(op_enum); - auto reduce_type = detail::get_mpi_type(send_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, - reduce_type, operation, root_rank, - comm->get())); + get_type(), operation, + root_rank, comm->get())); } @@ -816,21 +829,19 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, * @param send_buffer the buffer to reduce * @param recv_buffer the reduced result * @param count the number of elements to reduce - * @param op_enum the reduce operation. See @op_type + * @param operation the MPI_Op type reduce operation. * @param comm the communicator * @param req the request handle */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - op_type op_enum, int root_rank, + MPI_Op operation, int root_rank, std::shared_ptr comm, std::shared_ptr req) { - auto operation = detail::get_operation(op_enum); - auto reduce_type = detail::get_mpi_type(send_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, - reduce_type, operation, root_rank, - comm->get(), req->get())); + get_type(), operation, + root_rank, comm->get(), req->get())); } @@ -840,18 +851,16 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, * * @param recv_buffer the data to reduce and the reduced result * @param count the number of elements to reduce - * @param op_enum the reduce operation. See @op_type + * @param operation the MPI_Op type reduce operation. * @param comm the communicator */ template -void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, +void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, std::shared_ptr comm) { - auto operation = detail::get_operation(op_enum); - auto reduce_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(detail::in_place(), - recv_buffer, count, reduce_type, - operation, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Allreduce(detail::in_place(), recv_buffer, count, + get_type(), operation, comm->get())); } @@ -861,20 +870,18 @@ void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, * * @param recv_buffer the data to reduce and the reduced result * @param count the number of elements to reduce - * @param op_enum the reduce operation. See @op_type + * @param operation the reduce operation. See @MPI_Op * @param comm the communicator * @param req the request handle */ template -void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, +void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, std::shared_ptr comm, std::shared_ptr req) { - auto operation = detail::get_operation(op_enum); - auto reduce_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Iallreduce(detail::in_place(), recv_buffer, count, - reduce_type, operation, comm->get(), req->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( + detail::in_place(), recv_buffer, count, + get_type(), operation, comm->get(), req->get())); } @@ -885,19 +892,18 @@ void all_reduce(ReduceType* recv_buffer, int count, op_type op_enum, * @param send_buffer the data to reduce * @param recv_buffer the reduced result * @param count the number of elements to reduce - * @param op_enum the reduce operation. See @op_type + * @param operation the reduce operation. See @MPI_Op * @param comm the communicator * @param req the request handle */ template void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, op_type op_enum, + int count, MPI_Op operation, std::shared_ptr comm) { - auto operation = detail::get_operation(op_enum); - auto reduce_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( - send_buffer, recv_buffer, count, reduce_type, operation, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(send_buffer, recv_buffer, count, + get_type(), operation, + comm->get())); } @@ -908,21 +914,19 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, * @param send_buffer the data to reduce * @param recv_buffer the reduced result * @param count the number of elements to reduce - * @param op_enum the reduce operation. See @op_type + * @param operation the reduce operation. See @MPI_Op * @param comm the communicator * @param req the request handle */ template void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, op_type op_enum, + int count, MPI_Op operation, std::shared_ptr comm, std::shared_ptr req) { - auto operation = detail::get_operation(op_enum); - auto reduce_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, - reduce_type, operation, comm->get(), - req->get())); + get_type(), operation, + comm->get(), req->get())); } @@ -941,11 +945,9 @@ void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Gather(send_buffer, send_count, get_type(), recv_buffer, + recv_count, get_type(), root_rank, comm->get())); } @@ -962,16 +964,14 @@ void gather(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void gatherv(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank, - std::shared_ptr comm) +void gather_v(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank, + std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( - send_buffer, send_count, send_type, recv_buffer, recv_counts, - displacements, recv_type, root_rank, comm->get())); + send_buffer, send_count, get_type(), recv_buffer, recv_counts, + displacements, get_type(), root_rank, comm->get())); } @@ -989,11 +989,9 @@ void all_gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( + send_buffer, send_count, get_type(), recv_buffer, recv_count, + get_type(), comm->get())); } @@ -1011,11 +1009,9 @@ void scatter(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Scatter(send_buffer, send_count, get_type(), recv_buffer, + recv_count, get_type(), root_rank, comm->get())); } @@ -1030,16 +1026,14 @@ void scatter(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void scatterv(const SendType* send_buffer, const int* send_counts, - const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank, - std::shared_ptr comm) +void scatter_v(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank, + std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( - send_buffer, send_counts, displacements, send_type, recv_buffer, - recv_count, recv_type, root_rank, comm->get())); + send_buffer, send_counts, displacements, get_type(), + recv_buffer, recv_count, get_type(), root_rank, comm->get())); } @@ -1058,10 +1052,9 @@ template void all_to_all(RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(detail::in_place(), - recv_count, recv_type, recv_buffer, - recv_count, recv_type, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( + detail::in_place(), recv_count, get_type(), + recv_buffer, recv_count, get_type(), comm->get())); } @@ -1082,10 +1075,10 @@ void all_to_all(RecvType* recv_buffer, const int recv_count, std::shared_ptr comm, std::shared_ptr req) { - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( - detail::in_place(), recv_count, recv_type, recv_buffer, - recv_count, recv_type, comm->get(), req->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Ialltoall(detail::in_place(), recv_count, + get_type(), recv_buffer, recv_count, + get_type(), comm->get(), req->get())); } @@ -1104,11 +1097,9 @@ void all_to_all(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - comm->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Alltoall(send_buffer, send_count, get_type(), recv_buffer, + recv_count, get_type(), comm->get())); } @@ -1129,11 +1120,9 @@ void all_to_all(const SendType* send_buffer, const int send_count, std::shared_ptr comm, std::shared_ptr req) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(send_buffer, send_count, send_type, - recv_buffer, recv_count, recv_type, - comm->get(), req->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( + send_buffer, send_count, get_type(), recv_buffer, recv_count, + get_type(), comm->get(), req->get())); } @@ -1156,12 +1145,10 @@ void all_to_all_v(const SendType* send_buffer, const int* send_counts, const int* recv_counts, const int* recv_offsets, const int stride, std::shared_ptr comm) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( - send_buffer, send_counts, send_offsets, send_type, recv_buffer, - recv_counts, recv_offsets, recv_type, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Alltoallv(send_buffer, send_counts, send_offsets, + get_type(), recv_buffer, recv_counts, + recv_offsets, get_type(), comm->get())); } @@ -1186,12 +1173,10 @@ void all_to_all_v(const SendType* send_buffer, const int* send_counts, const int stride, std::shared_ptr comm, std::shared_ptr req) { - auto send_type = detail::get_mpi_type(send_buffer[0]); - auto recv_type = detail::get_mpi_type(recv_buffer[0]); - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( - send_buffer, send_counts, send_offsets, send_type, recv_buffer, - recv_counts, recv_offsets, recv_type, comm->get(), req->get())); + send_buffer, send_counts, send_offsets, get_type(), + recv_buffer, recv_counts, recv_offsets, get_type(), + comm->get(), req->get())); } @@ -1202,18 +1187,17 @@ void all_to_all_v(const SendType* send_buffer, const int* send_counts, * @param send_buffer the buffer to scan from * @param recv_buffer the result buffer * @param recv_count the number of elements to scan - * @param op_enum the operation type to be used for the scan. See @op_type + * @param operation the operation type to be used for the scan. See @MPI_Op * @param comm the communicator * @param req the request handle */ template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum, std::shared_ptr comm) + MPI_Op operation, std::shared_ptr comm) { - auto operation = detail::get_operation(op_enum); - auto scan_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, - scan_type, operation, comm->get())); + get_type(), operation, + comm->get())); } @@ -1224,20 +1208,18 @@ void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, * @param send_buffer the buffer to scan from * @param recv_buffer the result buffer * @param recv_count the number of elements to scan - * @param op_enum the operation type to be used for the scan. See @op_type + * @param operation the operation type to be used for the scan. See @MPI_Op * @param comm the communicator * @param req the request handle */ template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - op_type op_enum, std::shared_ptr comm, + MPI_Op operation, std::shared_ptr comm, std::shared_ptr req) { - auto operation = detail::get_operation(op_enum); - auto scan_type = detail::get_mpi_type(recv_buffer[0]); GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, - scan_type, operation, comm->get(), - req->get())); + get_type(), operation, + comm->get(), req->get())); } From 1b148ec294281d4189787e3eb639ae476ca6c99a Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 12 Nov 2021 10:28:06 +0100 Subject: [PATCH 27/59] Remove init finalize exception handling. --- include/ginkgo/core/base/exception.hpp | 36 ------------------- .../ginkgo/core/base/exception_helpers.hpp | 24 ------------- include/ginkgo/core/base/mpi.hpp | 23 ++++-------- 3 files changed, 7 insertions(+), 76 deletions(-) diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 8ff5f9e5e16..cd4b53735a4 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -195,42 +195,6 @@ class MpiError : public Error { }; -/** - * MpiError is thrown when a MPI has already been initialized. - * MPI_Init can only be called once in a program. - */ -class MpiInitialized : public Error { -public: - /** - * Initializes a MPI error. - * @param file The name of the offending source file - * @param line The source code line number where the error occurred - * @param func The name of the MPI routine that failed - */ - MpiInitialized(const std::string& file, int line, const std::string& func) - : Error(file, line, func) - {} -}; - - -/** - * MpiError is thrown when a MPI has already been finalized. - * Any MPI routines that are called after calling MPI_Finalize throw this error. - */ -class MpiFinalized : public Error { -public: - /** - * Initializes a MPI error. - * @param file The name of the offending source file - * @param line The source code line number where the error occurred - * @param func The name of the MPI routine that failed - */ - MpiFinalized(const std::string& file, int line, const std::string& func) - : Error(file, line, func) - {} -}; - - /** * CudaError is thrown when a CUDA routine throws a non-zero error code. */ diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index 82439d56a75..6b726e09344 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -306,30 +306,6 @@ inline dim<2> get_size(const dim<2>& size) { return size; } #define GKO_MPI_ERROR(_errcode) \ ::gko::MpiError(__FILE__, __LINE__, __func__, _errcode) -/** - * Throws when MPI has already been initialized. - * - */ -#define GKO_MPI_INITIALIZED \ - { \ - throw ::gko::MpiInitialized(__FILE__, __LINE__, __func__); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -/** - * Throws when MPI has already been finalized. - * - */ -#define GKO_MPI_FINALIZED \ - { \ - throw ::gko::MpiFinalized(__FILE__, __LINE__, __func__); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - /** * Instantiates a CudaError. diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 77cb3e3e9b9..ab908cb229e 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -207,30 +207,21 @@ class init_finalize { init_finalize(int& argc, char**& argv, const thread_type thread_t = thread_type::serialized) { - auto flag = is_initialized(); - if (!flag) { - this->required_thread_support_ = static_cast(thread_t); - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Init_thread(&argc, &argv, this->required_thread_support_, - &(this->provided_thread_support_))); - } else { - GKO_MPI_INITIALIZED; - } + this->required_thread_support_ = static_cast(thread_t); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Init_thread(&argc, &argv, this->required_thread_support_, + &(this->provided_thread_support_))); } init_finalize() = delete; - ~init_finalize() - { - auto flag = is_finalized(); - if (!flag) MPI_Finalize(); - } + ~init_finalize() { MPI_Finalize(); } + + int get_provided_thread_support() { return provided_thread_support_; } private: - int num_args_; int required_thread_support_; int provided_thread_support_; - char** args_; }; From 7f2cd8884b7ac616bf3b05be9bb8259a2d1c1e4b Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 16 Nov 2021 14:00:44 +0100 Subject: [PATCH 28/59] Make nb functions return requests --- core/test/mpi/base/bindings.cpp | 17 ++- include/ginkgo/core/base/mpi.hpp | 231 ++++++++++++++++--------------- 2 files changed, 132 insertions(+), 116 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index ccbd419e5c3..63a9efc6e6e 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -123,23 +123,28 @@ TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) auto send_array = gko::Array{ref}; auto recv_array = gko::Array{ref}; ValueType* data; - auto req = gko::mpi::request::create(num_ranks); + std::vector req1; + MPI_Request req2; if (my_rank == 0) { data = new ValueType[4]{1, 2, 3, 4}; send_array = gko::Array{ref, gko::Array(ref, 4, data)}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::send(send_array.get_data(), 4, rank, - 40 + rank, req, comm); + req1.emplace_back(gko::mpi::i_send( + send_array.get_data(), 4, rank, 40 + rank, comm)); } } } else { recv_array = gko::Array{ref, 4}; - gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, - req, comm); + req2 = std::move(gko::mpi::i_recv(recv_array.get_data(), 4, + 0, 40 + my_rank, comm)); + } + if (my_rank == 0) { + auto stat1 = gko::mpi::wait_all(req1); + } else { + auto stat2 = gko::mpi::wait(req2); } - gko::mpi::wait(req); if (my_rank != 0) { ASSERT_EQ(recv_array.get_data()[0], 1); ASSERT_EQ(recv_array.get_data()[1], 2); diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index ab908cb229e..697777cb827 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -273,35 +273,6 @@ class info { }; -/** - * A request class that takes in the given request and duplicates it - * for our purposes. As the class or object goes out of scope, the request - * is freed. - */ -class request : public EnableSharedCreateMethod { -public: - explicit request(const int size) : req_(new MPI_Request[size]) {} - - request() : req_(new MPI_Request[1]) {} - - void free(MPI_Request* req) - { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Request_free(req)); - } - - ~request() - { - // this->free(this->req_); - delete[] req_; - } - - MPI_Request* get() const { return req_; } - -private: - MPI_Request* req_; -}; - - /** * A status class that allows creation of MPI_Status and * frees the status array when it goes out of scope @@ -484,11 +455,26 @@ inline void synchronize(const std::shared_ptr& comm) * @param req The request to wait on. * @param status The status variable that can be queried. */ -inline void wait(std::shared_ptr req, - std::shared_ptr status = {}) +inline MPI_Status wait(MPI_Request& req) +{ + MPI_Status status; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(&req, &status)); + return status; +} + + +/** + * Allows a rank to wait on a particular request handle. + * + * @param req The request to wait on. + * @param status The status variable that can be queried. + */ +inline std::vector wait_all(std::vector& req) { + std::vector status; GKO_ASSERT_NO_MPI_ERRORS( - MPI_Wait(req->get(), status ? status->get() : MPI_STATUS_IGNORE)); + MPI_Waitall(req.size(), req.data(), status.data())); + return status; } @@ -627,24 +613,27 @@ void send(const SendType* send_buffer, const int send_count, /** - * Send (Non-blocking) data from calling process to destination rank. + * Send (Non-blocking, Immediate return) data from calling process to + * destination rank. * * @param send_buffer the buffer to send * @param send_count the number of elements to send * @param destination_rank the rank to send the data to * @param send_tag the tag for the send call - * @param req the request handle for the send call * @param comm the communicator + * + * @return the request handle for the send call */ template -void send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - std::shared_ptr req, - std::shared_ptr comm) +MPI_Request i_send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr comm) { + MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, get_type(), destination_rank, - send_tag, comm->get(), req->get())); + send_tag, comm->get(), &req)); + return req; } @@ -669,7 +658,7 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, /** - * Receive data from source rank. + * Receive (Non-blocking, Immediate return) data from source rank. * * @param recv_buffer the buffer to send * @param recv_count the number of elements to send @@ -677,15 +666,19 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, * @param recv_tag the tag for the send call * @param req the request handle for the send call * @param comm the communicator + * + * @return the request handle for the send call */ template -void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, - const int recv_tag, std::shared_ptr req, - std::shared_ptr comm) +MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag, + std::shared_ptr comm) { + MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, get_type(), source_rank, - recv_tag, comm->get(), req->get())); + recv_tag, comm->get(), &req)); + return req; } @@ -719,18 +712,19 @@ void put(const PutType* origin_buffer, const int origin_count, * @param target_disp the displacement at the target window * @param target_count the request handle for the send call * @param window the window to put the data into - * @param req the request handle + * + * @return the request handle for the send call */ template -void put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window, - std::shared_ptr req) +MPI_Request r_put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Rput(origin_buffer, origin_count, get_type(), target_rank, - target_disp, target_count, get_type(), window.get(), - req->get())); + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput( + origin_buffer, origin_count, get_type(), target_rank, + target_disp, target_count, get_type(), window.get(), &req)); + return req; } @@ -756,7 +750,7 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, /** - * Get data from the target window. + * Get data (with handle) from the target window. * * @param origin_buffer the buffer to send * @param origin_count the number of elements to get @@ -764,16 +758,19 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, * @param target_disp the displacement at the target window * @param target_count the request handle for the send call * @param window the window to put the data into - * @param req the request handle + * + * @return the request handle for the send call */ template -void get(GetType* origin_buffer, const int origin_count, const int target_rank, - const unsigned int target_disp, const int target_count, - window& window, std::shared_ptr req) +MPI_Request r_get(GetType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( - origin_buffer, origin_count, get_type(), target_rank, - target_disp, target_count, get_type(), window, req->get())); + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Rget(origin_buffer, origin_count, get_type(), target_rank, + target_disp, target_count, get_type(), window, &req)); + return req; } @@ -822,17 +819,19 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, * @param count the number of elements to reduce * @param operation the MPI_Op type reduce operation. * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call */ template -void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - MPI_Op operation, int root_rank, - std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, int root_rank, + std::shared_ptr comm) { + MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, get_type(), operation, - root_rank, comm->get(), req->get())); + root_rank, comm->get(), &req)); + return req; } @@ -863,16 +862,18 @@ void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, * @param count the number of elements to reduce * @param operation the reduce operation. See @MPI_Op * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call */ template -void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, - std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, + std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( - detail::in_place(), recv_buffer, count, - get_type(), operation, comm->get(), req->get())); + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Iallreduce(detail::in_place(), recv_buffer, count, + get_type(), operation, comm->get(), &req)); + return req; } @@ -885,7 +886,6 @@ void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, * @param count the number of elements to reduce * @param operation the reduce operation. See @MPI_Op * @param comm the communicator - * @param req the request handle */ template void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, @@ -907,17 +907,19 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, * @param count the number of elements to reduce * @param operation the reduce operation. See @MPI_Op * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call */ template -void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, - std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, + std::shared_ptr comm) { + MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, get_type(), operation, - comm->get(), req->get())); + comm->get(), &req)); + return req; } @@ -1056,20 +1058,21 @@ void all_to_all(RecvType* recv_buffer, const int recv_count, * @param buffer the buffer to send and the buffer receive * @param recv_count the number of elements to receive * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call * * @note This overload uses MPI_IN_PLACE and the source and destination buffers * are the same. */ template -void all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Ialltoall(detail::in_place(), recv_count, - get_type(), recv_buffer, recv_count, - get_type(), comm->get(), req->get())); + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( + detail::in_place(), recv_count, get_type(), + recv_buffer, recv_count, get_type(), comm->get(), &req)); + return req; } @@ -1103,17 +1106,19 @@ void all_to_all(const SendType* send_buffer, const int send_count, * @param recv_buffer the buffer to receive * @param recv_count the number of elements to receive * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call */ template -void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) { + MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( send_buffer, send_count, get_type(), recv_buffer, recv_count, - get_type(), comm->get(), req->get())); + get_type(), comm->get(), &req)); + return req; } @@ -1155,19 +1160,22 @@ void all_to_all_v(const SendType* send_buffer, const int* send_counts, * @param recv_offsets the offsets for the recv buffer * @param stride the stride to be used in case of sending concatenated data * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call */ template -void all_to_all_v(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const int stride, std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_all_to_all_v(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + const int stride, + std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( - send_buffer, send_counts, send_offsets, get_type(), - recv_buffer, recv_counts, recv_offsets, get_type(), - comm->get(), req->get())); + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Ialltoallv(send_buffer, send_counts, send_offsets, + get_type(), recv_buffer, recv_counts, + recv_offsets, get_type(), comm->get(), &req)); + return req; } @@ -1201,16 +1209,19 @@ void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, * @param recv_count the number of elements to scan * @param operation the operation type to be used for the scan. See @MPI_Op * @param comm the communicator - * @param req the request handle + * + * @return the request handle for the call */ template -void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - MPI_Op operation, std::shared_ptr comm, - std::shared_ptr req) +MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, + int count, MPI_Op operation, + std::shared_ptr comm) { + MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, get_type(), operation, - comm->get(), req->get())); + comm->get(), &req)); + return req; } From cf965adc333e6cce5fcce554788693500e1223f5 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 16 Nov 2021 14:16:01 +0100 Subject: [PATCH 29/59] Some test updates --- core/test/mpi/base/bindings.cpp | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 63a9efc6e6e..5f3923b78de 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -70,14 +70,13 @@ TEST_F(MpiBindings, CanSetADefaultWindow) TEST_F(MpiBindings, CanCreateWindow) { using ValueType = int; - ValueType* data; - data = new ValueType[4]{1, 2, 3, 4}; + auto data = std::vector{1, 2, 3, 4}; auto comm = gko::mpi::communicator::create_world(); - auto win = gko::mpi::window(data, 4 * sizeof(ValueType), comm); + auto win = + gko::mpi::window(data.data(), 4 * sizeof(ValueType), comm); ASSERT_NE(win.get(), MPI_WIN_NULL); win.lock_all(); win.unlock_all(); - delete data; } @@ -87,17 +86,13 @@ TEST_F(MpiBindings, CanSendAndRecvValues) auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; auto recv_array = gko::Array{ref}; - ValueType* data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; - send_array = - gko::Array{ref, gko::Array(ref, 4, data)}; + auto send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::send(send_array.get_const_data(), 4, rank, - 40 + rank, comm); + gko::mpi::send(send_array.data(), 4, rank, 40 + rank, + comm); } } } else { @@ -120,19 +115,17 @@ TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; + std::vector send_array; auto recv_array = gko::Array{ref}; ValueType* data; std::vector req1; MPI_Request req2; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; - send_array = - gko::Array{ref, gko::Array(ref, 4, data)}; + send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { req1.emplace_back(gko::mpi::i_send( - send_array.get_data(), 4, rank, 40 + rank, comm)); + send_array.data(), 4, rank, 40 + rank, comm)); } } } else { From 1c681bba9858619b61d94c727545451358eecb73 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 16 Nov 2021 16:54:54 +0100 Subject: [PATCH 30/59] Update tests to TYPED_TESTS --- core/test/mpi/base/bindings.cpp | 620 +++++++++++++++----------------- core/test/utils.hpp | 14 + 2 files changed, 313 insertions(+), 321 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 5f3923b78de..13fdb3d6d60 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -43,14 +43,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils.hpp" + + +template class MpiBindings : public ::testing::Test { protected: + using value_type = T; MpiBindings() : ref(gko::ReferenceExecutor::create()) {} std::shared_ptr ref; - void assert_equal_arrays(gko::Array& array_1, - gko::Array& array_2) + void assert_equal_vectors(std::vector& vec_1, + std::vector& vec_2) + { + ASSERT_EQ(vec_1.size(), vec_2.size()); + for (auto i = 0; i < vec_1.size(); ++i) { + EXPECT_EQ(vec_1[i], vec_2[i]); + } + } + + void assert_equal_arrays(gko::Array& array_1, + gko::Array& array_2) { ASSERT_EQ(array_1.get_num_elems(), array_2.get_num_elems()); for (gko::size_type i = 0; i < array_1.get_num_elems(); ++i) { @@ -59,78 +73,75 @@ class MpiBindings : public ::testing::Test { } }; +TYPED_TEST_SUITE(MpiBindings, gko::test::PODTypes, TypenameNameGenerator); + -TEST_F(MpiBindings, CanSetADefaultWindow) +TYPED_TEST(MpiBindings, CanSetADefaultWindow) { - gko::mpi::window win; + gko::mpi::window win; ASSERT_EQ(win.get(), MPI_WIN_NULL); } -TEST_F(MpiBindings, CanCreateWindow) +TYPED_TEST(MpiBindings, CanCreateWindow) { - using ValueType = int; - auto data = std::vector{1, 2, 3, 4}; + auto data = std::vector{1, 2, 3, 4}; auto comm = gko::mpi::communicator::create_world(); auto win = - gko::mpi::window(data.data(), 4 * sizeof(ValueType), comm); + gko::mpi::window(data.data(), 4 * sizeof(TypeParam), comm); ASSERT_NE(win.get(), MPI_WIN_NULL); win.lock_all(); win.unlock_all(); } -TEST_F(MpiBindings, CanSendAndRecvValues) +TYPED_TEST(MpiBindings, CanSendAndRecvValues) { - using ValueType = int; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto recv_array = gko::Array{ref}; + auto recv_array = gko::Array{this->ref}; if (my_rank == 0) { - auto send_array = std::vector{1, 2, 3, 4}; + auto send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::send(send_array.data(), 4, rank, 40 + rank, + gko::mpi::send(send_array.data(), 4, rank, 40 + rank, comm); } } } else { - recv_array = gko::Array{ref, 4}; - gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, + recv_array = gko::Array{this->ref, 4}; + gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, comm); } if (my_rank != 0) { - ASSERT_EQ(recv_array.get_data()[0], 1); - ASSERT_EQ(recv_array.get_data()[1], 2); - ASSERT_EQ(recv_array.get_data()[2], 3); - ASSERT_EQ(recv_array.get_data()[3], 4); + auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; + this->assert_equal_arrays(ref_array, recv_array); } } -TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) +TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) { - using ValueType = int; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - std::vector send_array; - auto recv_array = gko::Array{ref}; - ValueType* data; + std::vector send_array; + auto recv_array = gko::Array{this->ref}; + TypeParam* data; std::vector req1; MPI_Request req2; if (my_rank == 0) { - send_array = std::vector{1, 2, 3, 4}; + send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - req1.emplace_back(gko::mpi::i_send( + req1.emplace_back(gko::mpi::i_send( send_array.data(), 4, rank, 40 + rank, comm)); } } } else { - recv_array = gko::Array{ref, 4}; - req2 = std::move(gko::mpi::i_recv(recv_array.get_data(), 4, + recv_array = gko::Array{this->ref, 4}; + req2 = std::move(gko::mpi::i_recv(recv_array.get_data(), 4, 0, 40 + my_rank, comm)); } if (my_rank == 0) { @@ -139,255 +150,228 @@ TEST_F(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) auto stat2 = gko::mpi::wait(req2); } if (my_rank != 0) { - ASSERT_EQ(recv_array.get_data()[0], 1); - ASSERT_EQ(recv_array.get_data()[1], 2); - ASSERT_EQ(recv_array.get_data()[2], 3); - ASSERT_EQ(recv_array.get_data()[3], 4); + auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; + this->assert_equal_arrays(ref_array, recv_array); } } -TEST_F(MpiBindings, CanPutValuesWithLockAll) +TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) { - using ValueType = int; - using window = gko::mpi::window; + using window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - int* data; + std::vector data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; + data = std::vector{1, 2, 3, 4}; } else { - data = new ValueType[4]{0, 0, 0, 0}; + data = std::vector{0, 0, 0, 0}; } - auto win = window(data, 4 * sizeof(ValueType), comm); + auto win = window(data.data(), 4 * sizeof(TypeParam), comm); win.lock_all(); if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::put(data, 4, rank, 0, 4, win); + gko::mpi::put(data.data(), 4, rank, 0, 4, win); win.flush(rank); } } } win.unlock_all(); gko::mpi::synchronize(comm); - ASSERT_EQ(data[0], 1); - ASSERT_EQ(data[1], 2); - ASSERT_EQ(data[2], 3); - ASSERT_EQ(data[3], 4); - delete data; + + auto ref = std::vector{1, 2, 3, 4}; + this->assert_equal_vectors(data, ref); } -TEST_F(MpiBindings, CanPutValuesWithExclusiveLock) +TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) { - using ValueType = int; - using window = gko::mpi::window; + using window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - int* data; + std::vector data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; + data = std::vector{1, 2, 3, 4}; } else { - data = new ValueType[4]{0, 0, 0, 0}; + data = std::vector{0, 0, 0, 0}; } - auto win = window(data, 4 * sizeof(ValueType), comm); + auto win = window(data.data(), 4 * sizeof(TypeParam), comm); if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { win.lock(rank, 0, window::lock_type::exclusive); - gko::mpi::put(data, 4, rank, 0, 4, win); + gko::mpi::put(data.data(), 4, rank, 0, 4, win); win.flush(rank); win.unlock(rank); } } } gko::mpi::synchronize(comm); - ASSERT_EQ(data[0], 1); - ASSERT_EQ(data[1], 2); - ASSERT_EQ(data[2], 3); - ASSERT_EQ(data[3], 4); - delete data; + + auto ref = std::vector{1, 2, 3, 4}; + this->assert_equal_vectors(data, ref); } -TEST_F(MpiBindings, CanPutValuesWithFence) +TYPED_TEST(MpiBindings, CanPutValuesWithFence) { - using ValueType = int; - using window = gko::mpi::window; + using window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; - auto recv_array = gko::Array{ref}; - int* data; + std::vector data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; + data = std::vector{1, 2, 3, 4}; } else { - data = new ValueType[4]{0, 0, 0, 0}; + data = std::vector{0, 0, 0, 0}; } - auto win = window(data, 4 * sizeof(ValueType), comm); + auto win = window(data.data(), 4 * sizeof(TypeParam), comm); win.fence(); if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::put(data, 4, rank, 0, 4, win); + gko::mpi::put(data.data(), 4, rank, 0, 4, win); } } } win.fence(); gko::mpi::synchronize(comm); - ASSERT_EQ(data[0], 1); - ASSERT_EQ(data[1], 2); - ASSERT_EQ(data[2], 3); - ASSERT_EQ(data[3], 4); - delete data; + + auto ref = std::vector{1, 2, 3, 4}; + this->assert_equal_vectors(data, ref); } -TEST_F(MpiBindings, CanGetValuesWithLockAll) +TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) { - using ValueType = int; - using Window = gko::mpi::window; + using Window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; - auto recv_array = gko::Array{ref}; - int* data; + std::vector data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; + data = std::vector{1, 2, 3, 4}; } else { - data = new ValueType[4]{0, 0, 0, 0}; + data = std::vector{0, 0, 0, 0}; } - auto win = Window(data, 4 * sizeof(ValueType), comm); + auto win = Window(data.data(), 4 * sizeof(TypeParam), comm); if (my_rank != 0) { win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::get(data, 4, 0, 0, 4, win); + gko::mpi::get(data.data(), 4, 0, 0, 4, win); win.flush(0); } } win.unlock_all(); } gko::mpi::synchronize(comm); - ASSERT_EQ(data[0], 1); - ASSERT_EQ(data[1], 2); - ASSERT_EQ(data[2], 3); - ASSERT_EQ(data[3], 4); - delete data; + + auto ref = std::vector{1, 2, 3, 4}; + this->assert_equal_vectors(data, ref); } -TEST_F(MpiBindings, CanGetValuesWithExclusiveLock) +TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) { - using ValueType = int; - using Window = gko::mpi::window; + using Window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; - auto recv_array = gko::Array{ref}; - int* data; + std::vector data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; + data = std::vector{1, 2, 3, 4}; } else { - data = new ValueType[4]{0, 0, 0, 0}; + data = std::vector{0, 0, 0, 0}; } - auto win = Window(data, 4 * sizeof(ValueType), comm); + auto win = Window(data.data(), 4 * sizeof(TypeParam), comm); if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { win.lock(0, 0, Window::lock_type::exclusive); - gko::mpi::get(data, 4, 0, 0, 4, win); + gko::mpi::get(data.data(), 4, 0, 0, 4, win); win.flush(0); win.unlock(0); } } } gko::mpi::synchronize(comm); - ASSERT_EQ(data[0], 1); - ASSERT_EQ(data[1], 2); - ASSERT_EQ(data[2], 3); - ASSERT_EQ(data[3], 4); - delete data; + + auto ref = std::vector{1, 2, 3, 4}; + this->assert_equal_vectors(data, ref); } -TEST_F(MpiBindings, CanGetValuesWithFence) +TYPED_TEST(MpiBindings, CanGetValuesWithFence) { - using ValueType = int; - using Window = gko::mpi::window; + using Window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; - auto recv_array = gko::Array{ref}; - int* data; + std::vector data; if (my_rank == 0) { - data = new ValueType[4]{1, 2, 3, 4}; + data = std::vector{1, 2, 3, 4}; } else { - data = new ValueType[4]{0, 0, 0, 0}; + data = std::vector{0, 0, 0, 0}; } - auto win = Window(data, 4 * sizeof(ValueType), comm); + auto win = Window(data.data(), 4 * sizeof(TypeParam), comm); win.fence(); if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::get(data, 4, 0, 0, 4, win); + gko::mpi::get(data.data(), 4, 0, 0, 4, win); } } } win.fence(); gko::mpi::synchronize(comm); - ASSERT_EQ(data[0], 1); - ASSERT_EQ(data[1], 2); - ASSERT_EQ(data[2], 3); - ASSERT_EQ(data[3], 4); - delete data; + + auto ref = std::vector{1, 2, 3, 4}; + this->assert_equal_vectors(data, ref); } -TEST_F(MpiBindings, CanBroadcastValues) +TYPED_TEST(MpiBindings, CanBroadcastValues) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - double* data; - auto array = gko::Array{ref, 8}; + TypeParam* data; + auto array = gko::Array{this->ref, 8}; if (my_rank == 0) { // clang-format off - data = new double[8]{ 2.0, 3.0, 1.0, - 3.0,-1.0, 0.0 , 3.5, 1.5}; + data = new TypeParam[8]{ 2, 3, 1, + 3,-1, 0 , 3, 1}; // clang-format on - array = gko::Array{gko::Array::view(ref, 8, data)}; + array = gko::Array{ + gko::Array::view(this->ref, 8, data)}; } - gko::mpi::broadcast(array.get_data(), 8, 0, comm); + gko::mpi::broadcast(array.get_data(), 8, 0, comm); auto comp_data = array.get_data(); - ASSERT_EQ(comp_data[0], 2.0); - ASSERT_EQ(comp_data[1], 3.0); - ASSERT_EQ(comp_data[2], 1.0); - ASSERT_EQ(comp_data[3], 3.0); - ASSERT_EQ(comp_data[4], -1.0); - ASSERT_EQ(comp_data[5], 0.0); - ASSERT_EQ(comp_data[6], 3.5); - ASSERT_EQ(comp_data[7], 1.5); + ASSERT_EQ(comp_data[0], TypeParam{2}); + ASSERT_EQ(comp_data[1], TypeParam{3}); + ASSERT_EQ(comp_data[2], TypeParam{1}); + ASSERT_EQ(comp_data[3], TypeParam{3}); + ASSERT_EQ(comp_data[4], TypeParam{-1}); + ASSERT_EQ(comp_data[5], TypeParam{0}); + ASSERT_EQ(comp_data[6], TypeParam{3}); + ASSERT_EQ(comp_data[7], TypeParam{1}); if (my_rank == 0) { delete data; } } -TEST_F(MpiBindings, CanReduceValues) +TYPED_TEST(MpiBindings, CanReduceValues) { - using ValueType = double; + using TypeParam = TypeParam; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - ValueType data, sum, max, min; + TypeParam data, sum, max, min; if (my_rank == 0) { data = 3; } else if (my_rank == 1) { @@ -397,23 +381,23 @@ TEST_F(MpiBindings, CanReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::reduce(&data, &sum, 1, MPI_SUM, 0, comm); - gko::mpi::reduce(&data, &max, 1, MPI_MAX, 0, comm); - gko::mpi::reduce(&data, &min, 1, MPI_MIN, 0, comm); + gko::mpi::reduce(&data, &sum, 1, MPI_SUM, 0, comm); + gko::mpi::reduce(&data, &max, 1, MPI_MAX, 0, comm); + gko::mpi::reduce(&data, &min, 1, MPI_MIN, 0, comm); if (my_rank == 0) { - EXPECT_EQ(sum, 16.0); - EXPECT_EQ(max, 6.0); - EXPECT_EQ(min, 2.0); + EXPECT_EQ(sum, TypeParam{16}); + EXPECT_EQ(max, TypeParam{6}); + EXPECT_EQ(min, TypeParam{2}); } } -TEST_F(MpiBindings, CanAllReduceValues) +TYPED_TEST(MpiBindings, CanAllReduceValues) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - int data, sum; + TypeParam data, sum; if (my_rank == 0) { data = 3; } else if (my_rank == 1) { @@ -423,17 +407,17 @@ TEST_F(MpiBindings, CanAllReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, &sum, 1, MPI_SUM, comm); - ASSERT_EQ(sum, 16); + gko::mpi::all_reduce(&data, &sum, 1, MPI_SUM, comm); + ASSERT_EQ(sum, TypeParam{16}); } -TEST_F(MpiBindings, CanAllReduceValuesInPlace) +TYPED_TEST(MpiBindings, CanAllReduceValuesInPlace) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - int data; + TypeParam data; if (my_rank == 0) { data = 3; } else if (my_rank == 1) { @@ -443,54 +427,55 @@ TEST_F(MpiBindings, CanAllReduceValuesInPlace) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, 1, MPI_SUM, comm); - ASSERT_EQ(data, 16); + gko::mpi::all_reduce(&data, 1, MPI_SUM, comm); + ASSERT_EQ(data, TypeParam{16}); } -TEST_F(MpiBindings, CanScatterValues) +TYPED_TEST(MpiBindings, CanScatterValues) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - double* data; - auto scatter_from_array = gko::Array{ref->get_master()}; + TypeParam* data; + auto scatter_from_array = gko::Array{this->ref->get_master()}; if (my_rank == 0) { // clang-format off - data = new double[8]{ 2.0, 3.0, 1.0, - 3.0,-1.0, 0.0 , 3.5, 1.5}; + data = new TypeParam[8]{ 2, 3, 1, + 3,-1, 0 , 3, 1}; // clang-format on - scatter_from_array = gko::Array{ - ref->get_master(), gko::Array::view(ref, 8, data)}; - } - auto scatter_into_array = gko::Array{ref, 2}; - gko::mpi::scatter(scatter_from_array.get_data(), 2, - scatter_into_array.get_data(), 2, 0, - comm); + scatter_from_array = gko::Array{ + this->ref->get_master(), + gko::Array::view(this->ref, 8, data)}; + } + auto scatter_into_array = gko::Array{this->ref, 2}; + gko::mpi::scatter(scatter_from_array.get_data(), 2, + scatter_into_array.get_data(), 2, 0, + comm); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { - ASSERT_EQ(comp_data[0], 2.0); - ASSERT_EQ(comp_data[1], 3.0); + ASSERT_EQ(comp_data[0], TypeParam{2}); + ASSERT_EQ(comp_data[1], TypeParam{3}); delete data; } else if (my_rank == 1) { - ASSERT_EQ(comp_data[0], 1.0); - ASSERT_EQ(comp_data[1], 3.0); + ASSERT_EQ(comp_data[0], TypeParam{1}); + ASSERT_EQ(comp_data[1], TypeParam{3}); } else if (my_rank == 2) { - ASSERT_EQ(comp_data[0], -1.0); - ASSERT_EQ(comp_data[1], 0.0); + ASSERT_EQ(comp_data[0], TypeParam{-1}); + ASSERT_EQ(comp_data[1], TypeParam{0}); } else if (my_rank == 3) { - ASSERT_EQ(comp_data[0], 3.5); - ASSERT_EQ(comp_data[1], 1.5); + ASSERT_EQ(comp_data[0], TypeParam{3}); + ASSERT_EQ(comp_data[1], TypeParam{1}); } } -TEST_F(MpiBindings, CanGatherValues) +TYPED_TEST(MpiBindings, CanGatherValues) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - int data; + TypeParam data; if (my_rank == 0) { data = 3; } else if (my_rank == 1) { @@ -500,40 +485,41 @@ TEST_F(MpiBindings, CanGatherValues) } else if (my_rank == 3) { data = 6; } - auto gather_array = - gko::Array{ref, static_cast(num_ranks)}; - gko::mpi::gather(&data, 1, gather_array.get_data(), 1, 0, comm); + auto gather_array = gko::Array{ + this->ref, static_cast(num_ranks)}; + gko::mpi::gather(&data, 1, gather_array.get_data(), 1, + 0, comm); if (my_rank == 0) { - ASSERT_EQ(gather_array.get_data()[0], 3); - ASSERT_EQ(gather_array.get_data()[1], 5); - ASSERT_EQ(gather_array.get_data()[2], 2); - ASSERT_EQ(gather_array.get_data()[3], 6); + ASSERT_EQ(gather_array.get_data()[0], TypeParam{3}); + ASSERT_EQ(gather_array.get_data()[1], TypeParam{5}); + ASSERT_EQ(gather_array.get_data()[2], TypeParam{2}); + ASSERT_EQ(gather_array.get_data()[3], TypeParam{6}); } } -TEST_F(MpiBindings, CanScatterValuesWithDisplacements) +TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - double* data; - auto scatter_from_array = gko::Array{ref}; - auto scatter_into_array = gko::Array{ref}; - auto s_counts = gko::Array{ref->get_master(), + TypeParam* data; + auto scatter_from_array = gko::Array{this->ref}; + auto scatter_into_array = gko::Array{this->ref}; + auto s_counts = gko::Array{this->ref->get_master(), static_cast(num_ranks)}; - auto displacements = gko::Array{ref->get_master()}; + auto displacements = gko::Array{this->ref->get_master()}; int nelems; if (my_rank == 0) { // clang-format off - data = new double[10]{ 2.0, 3.0, 1.0, - 3.0,-1.0, 0.0, - 2.5,-1.5, 0.5, 3.5}; + data = new TypeParam[10]{ 2, 3, 1, + 3,-1, 0, + 2,-1, 0, 3}; // clang-format on - scatter_from_array = - gko::Array{ref, gko::Array::view(ref, 10, data)}; + scatter_from_array = gko::Array{ + this->ref, gko::Array::view(this->ref, 10, data)}; nelems = 2; - displacements = gko::Array{ref, {0, 2, 6, 9}}; + displacements = gko::Array{this->ref, {0, 2, 6, 9}}; } else if (my_rank == 1) { nelems = 4; } else if (my_rank == 2) { @@ -542,88 +528,81 @@ TEST_F(MpiBindings, CanScatterValuesWithDisplacements) nelems = 1; } scatter_into_array = - gko::Array{ref, static_cast(nelems)}; + gko::Array{this->ref, static_cast(nelems)}; gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0, comm); - gko::mpi::scatter_v( + gko::mpi::scatter_v( scatter_from_array.get_data(), s_counts.get_data(), displacements.get_data(), scatter_into_array.get_data(), nelems, 0, comm); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { - ASSERT_EQ(comp_data[0], 2.0); - ASSERT_EQ(comp_data[1], 3.0); + ASSERT_EQ(comp_data[0], TypeParam{2}); + ASSERT_EQ(comp_data[1], TypeParam{3}); delete data; } else if (my_rank == 1) { - ASSERT_EQ(comp_data[0], 1.0); - ASSERT_EQ(comp_data[1], 3.0); - ASSERT_EQ(comp_data[2], -1.0); - ASSERT_EQ(comp_data[3], 0.0); + ASSERT_EQ(comp_data[0], TypeParam{1}); + ASSERT_EQ(comp_data[1], TypeParam{3}); + ASSERT_EQ(comp_data[2], TypeParam{-1}); + ASSERT_EQ(comp_data[3], TypeParam{0}); } else if (my_rank == 2) { - ASSERT_EQ(comp_data[0], 2.5); - ASSERT_EQ(comp_data[1], -1.5); - ASSERT_EQ(comp_data[2], 0.5); + ASSERT_EQ(comp_data[0], TypeParam{2}); + ASSERT_EQ(comp_data[1], TypeParam{-1}); + ASSERT_EQ(comp_data[2], TypeParam{0}); } else if (my_rank == 3) { - ASSERT_EQ(comp_data[0], 3.5); + ASSERT_EQ(comp_data[0], TypeParam{3}); } } -TEST_F(MpiBindings, CanGatherValuesWithDisplacements) +TYPED_TEST(MpiBindings, CanGatherValuesWithDisplacements) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - double* data; - auto gather_from_array = gko::Array{ref}; - auto gather_into_array = gko::Array{ref}; - auto r_counts = gko::Array{ref->get_master(), + TypeParam* data; + auto gather_from_array = gko::Array{this->ref}; + auto gather_into_array = gko::Array{this->ref}; + auto r_counts = gko::Array{this->ref->get_master(), static_cast(num_ranks)}; - auto displacements = gko::Array{ref->get_master()}; + auto displacements = gko::Array{this->ref->get_master()}; int nelems; if (my_rank == 0) { - data = new double[2]{2.0, 3.0}; - gather_from_array = gko::Array{ - ref->get_master(), - gko::Array::view(ref->get_master(), 2, data)}; + data = new TypeParam[2]{2, 3}; + gather_from_array = gko::Array{ + this->ref->get_master(), + gko::Array::view(this->ref->get_master(), 2, data)}; nelems = 2; - displacements = gko::Array{ref->get_master(), {0, 2, 6, 7}}; - gather_into_array = gko::Array{ref, 10}; + displacements = gko::Array{this->ref->get_master(), {0, 2, 6, 7}}; + gather_into_array = gko::Array{this->ref, 10}; } else if (my_rank == 1) { - data = new double[4]{1.5, 2.0, 1.0, 0.5}; + data = new TypeParam[4]{1, 2, 1, 0}; nelems = 4; - gather_from_array = gko::Array{ - ref->get_master(), - gko::Array::view(ref->get_master(), 4, data)}; + gather_from_array = gko::Array{ + this->ref->get_master(), + gko::Array::view(this->ref->get_master(), 4, data)}; } else if (my_rank == 2) { - data = new double[1]{1.0}; + data = new TypeParam[1]{1}; nelems = 1; - gather_from_array = gko::Array{ - ref->get_master(), - gko::Array::view(ref->get_master(), 1, data)}; + gather_from_array = gko::Array{ + this->ref->get_master(), + gko::Array::view(this->ref->get_master(), 1, data)}; } else if (my_rank == 3) { - data = new double[3]{1.9, -4.0, 5.0}; + data = new TypeParam[3]{1, -4, 5}; nelems = 3; - gather_from_array = gko::Array{ - ref->get_master(), - gko::Array::view(ref->get_master(), 3, data)}; + gather_from_array = gko::Array{ + this->ref->get_master(), + gko::Array::view(this->ref->get_master(), 3, data)}; } gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0, comm); - gko::mpi::gather_v( + gko::mpi::gather_v( gather_from_array.get_data(), nelems, gather_into_array.get_data(), r_counts.get_data(), displacements.get_data(), 0, comm); auto comp_data = gather_into_array.get_data(); if (my_rank == 0) { - ASSERT_EQ(comp_data[0], 2.0); - ASSERT_EQ(comp_data[1], 3.0); - ASSERT_EQ(comp_data[2], 1.5); - ASSERT_EQ(comp_data[3], 2.0); - ASSERT_EQ(comp_data[4], 1.0); - ASSERT_EQ(comp_data[5], 0.5); - ASSERT_EQ(comp_data[6], 1.0); - ASSERT_EQ(comp_data[7], 1.9); - ASSERT_EQ(comp_data[8], -4.0); - ASSERT_EQ(comp_data[9], 5.0); + auto ref_array = + gko::Array(this->ref, {2, 3, 1, 2, 1, 0, 1, 1, -4, 5}); + this->assert_equal_arrays(gather_into_array, ref_array); } else { ASSERT_EQ(comp_data, nullptr); } @@ -631,109 +610,109 @@ TEST_F(MpiBindings, CanGatherValuesWithDisplacements) } -TEST_F(MpiBindings, AllToAllWorksCorrectly) +TYPED_TEST(MpiBindings, AllToAllWorksCorrectly) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; - auto recv_array = gko::Array{ref}; - auto ref_array = gko::Array{ref}; - recv_array = gko::Array{ref, 4}; + auto send_array = gko::Array{this->ref}; + auto recv_array = gko::Array{this->ref}; + auto ref_array = gko::Array{this->ref}; + recv_array = gko::Array{this->ref, 4}; if (my_rank == 0) { - send_array = gko::Array(ref, {2.5, 3.0, 1.5, 2.0}); - ref_array = gko::Array(ref, {2.5, 2.5, 2.0, 5.5}); + send_array = gko::Array(this->ref, {2, 3, 1, 2}); + ref_array = gko::Array(this->ref, {2, 2, 2, 5}); } else if (my_rank == 1) { - send_array = gko::Array(ref, {2.5, 3.5, 1.0, 2.0}); - ref_array = gko::Array(ref, {3.0, 3.5, 3.0, 3.5}); + send_array = gko::Array(this->ref, {2, 3, 1, 2}); + ref_array = gko::Array(this->ref, {3, 3, 3, 3}); } else if (my_rank == 2) { - send_array = gko::Array(ref, {2.0, 3.0, 1.5, 0.0}); - ref_array = gko::Array(ref, {1.5, 1.0, 1.5, 3.5}); + send_array = gko::Array(this->ref, {2, 3, 1, 0}); + ref_array = gko::Array(this->ref, {1, 1, 1, 3}); } else if (my_rank == 3) { - send_array = gko::Array(ref, {5.5, 3.5, 3.5, -2.0}); - ref_array = gko::Array(ref, {2.0, 2.0, 0.0, -2.0}); + send_array = gko::Array(this->ref, {5, 3, 3, -2}); + ref_array = gko::Array(this->ref, {2, 2, 0, -2}); } - gko::mpi::all_to_all(send_array.get_data(), 1, - recv_array.get_data(), 1, comm); + gko::mpi::all_to_all(send_array.get_data(), 1, + recv_array.get_data(), 1, comm); this->assert_equal_arrays(recv_array, ref_array); } -TEST_F(MpiBindings, AllToAllInPlaceWorksCorrectly) +TYPED_TEST(MpiBindings, AllToAllInPlaceWorksCorrectly) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto recv_array = gko::Array{ref}; - auto ref_array = gko::Array{ref}; - recv_array = gko::Array{ref, 4}; + auto recv_array = gko::Array{this->ref}; + auto ref_array = gko::Array{this->ref}; + recv_array = gko::Array{this->ref, 4}; if (my_rank == 0) { - recv_array = gko::Array(ref, {2.5, 3.0, 1.5, 2.0}); - ref_array = gko::Array(ref, {2.5, 2.5, 2.0, 5.5}); + recv_array = gko::Array(this->ref, {2, 3, 1, 2}); + ref_array = gko::Array(this->ref, {2, 2, 2, 5}); } else if (my_rank == 1) { - recv_array = gko::Array(ref, {2.5, 3.5, 1.0, 2.0}); - ref_array = gko::Array(ref, {3.0, 3.5, 3.0, 3.5}); + recv_array = gko::Array(this->ref, {2, 3, 1, 2}); + ref_array = gko::Array(this->ref, {3, 3, 3, 3}); } else if (my_rank == 2) { - recv_array = gko::Array(ref, {2.0, 3.0, 1.5, 0.0}); - ref_array = gko::Array(ref, {1.5, 1.0, 1.5, 3.5}); + recv_array = gko::Array(this->ref, {2, 3, 1, 0}); + ref_array = gko::Array(this->ref, {1, 1, 1, 3}); } else if (my_rank == 3) { - recv_array = gko::Array(ref, {5.5, 3.5, 3.5, -2.0}); - ref_array = gko::Array(ref, {2.0, 2.0, 0.0, -2.0}); + recv_array = gko::Array(this->ref, {5, 3, 3, -2}); + ref_array = gko::Array(this->ref, {2, 2, 0, -2}); } - gko::mpi::all_to_all(recv_array.get_data(), 1, comm); + gko::mpi::all_to_all(recv_array.get_data(), 1, comm); this->assert_equal_arrays(recv_array, ref_array); } -TEST_F(MpiBindings, AllToAllVWorksCorrectly) +TYPED_TEST(MpiBindings, AllToAllVWorksCorrectly) { auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - auto send_array = gko::Array{ref}; - auto recv_array = gko::Array{ref}; - auto ref_array = gko::Array{ref}; - auto scounts_array = gko::Array{ref}; - auto soffset_array = gko::Array{ref}; - auto rcounts_array = gko::Array{ref}; - auto roffset_array = gko::Array{ref}; + auto send_array = gko::Array{this->ref}; + auto recv_array = gko::Array{this->ref}; + auto ref_array = gko::Array{this->ref}; + auto scounts_array = gko::Array{this->ref}; + auto soffset_array = gko::Array{this->ref}; + auto rcounts_array = gko::Array{this->ref}; + auto roffset_array = gko::Array{this->ref}; if (my_rank == 0) { - recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; - send_array = gko::Array{ref, {2.5, 3.0, 1.5, 2.0}}; - scounts_array = gko::Array{ref, {1, 2, 1, 0}}; - rcounts_array = gko::Array{ref, {1, 2, 2, 1}}; - soffset_array = gko::Array{ref, {0, 1, 1, 0}}; - roffset_array = gko::Array{ref, {0, 1, 3, 5}}; - ref_array = gko::Array{ref, {2.5, 2.5, 3.5, 1.5, 2.4, 5.5}}; + recv_array = gko::Array{this->ref, {0, 0, 0, 0, 0, 0}}; + send_array = gko::Array{this->ref, {2, 3, 1, 2}}; + scounts_array = gko::Array{this->ref, {1, 2, 1, 0}}; + rcounts_array = gko::Array{this->ref, {1, 2, 2, 1}}; + soffset_array = gko::Array{this->ref, {0, 1, 1, 0}}; + roffset_array = gko::Array{this->ref, {0, 1, 3, 5}}; + ref_array = gko::Array{this->ref, {2, 2, 3, 1, 2, 5}}; } else if (my_rank == 1) { - recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; - send_array = gko::Array{ref, {2.5, 3.5, 1.0, 2.0}}; - scounts_array = gko::Array{ref, {2, 2, 1, 2}}; - rcounts_array = gko::Array{ref, {2, 2, 2, 0}}; - soffset_array = gko::Array{ref, {0, 1, 1, 0}}; - roffset_array = gko::Array{ref, {0, 2, 4, 5}}; - ref_array = gko::Array{ref, {3.0, 1.5, 3.5, 1.0, 3.0, 1.5}}; + recv_array = gko::Array{this->ref, {0, 0, 0, 0, 0, 0}}; + send_array = gko::Array{this->ref, {2, 3, 1, 2}}; + scounts_array = gko::Array{this->ref, {2, 2, 1, 2}}; + rcounts_array = gko::Array{this->ref, {2, 2, 2, 0}}; + soffset_array = gko::Array{this->ref, {0, 1, 1, 0}}; + roffset_array = gko::Array{this->ref, {0, 2, 4, 5}}; + ref_array = gko::Array{this->ref, {3, 1, 3, 1, 3, 1}}; } else if (my_rank == 2) { - recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0}}; - send_array = gko::Array{ref, {2.0, 3.0, 1.5, 2.4}}; - scounts_array = gko::Array{ref, {2, 2, 1, 1}}; - rcounts_array = gko::Array{ref, {1, 1, 1, 1}}; - soffset_array = gko::Array{ref, {2, 1, 1, 1}}; - roffset_array = gko::Array{ref, {0, 1, 2, 3}}; - ref_array = gko::Array{ref, {3.0, 3.5, 3.0, 3.5}}; + recv_array = gko::Array{this->ref, {0, 0, 0, 0}}; + send_array = gko::Array{this->ref, {2, 3, 1, 2}}; + scounts_array = gko::Array{this->ref, {2, 2, 1, 1}}; + rcounts_array = gko::Array{this->ref, {1, 1, 1, 1}}; + soffset_array = gko::Array{this->ref, {2, 1, 1, 1}}; + roffset_array = gko::Array{this->ref, {0, 1, 2, 3}}; + ref_array = gko::Array{this->ref, {3, 3, 3, 3}}; } else if (my_rank == 3) { - recv_array = gko::Array{ref, {0.0, 0.0, 0.0, 0.0}}; - send_array = gko::Array{ref, {5.5, 3.5, 3.5, -2.0}}; - scounts_array = gko::Array{ref, {1, 0, 1, 0}}; - rcounts_array = gko::Array{ref, {0, 2, 1, 0}}; - soffset_array = gko::Array{ref, {0, 1, 1, 0}}; - roffset_array = gko::Array{ref, {0, 1, 3, 3}}; - ref_array = gko::Array{ref, {0.0, 2.5, 3.5, 3.0}}; + recv_array = gko::Array{this->ref, {0, 0, 0, 0}}; + send_array = gko::Array{this->ref, {5, 3, 3, -2}}; + scounts_array = gko::Array{this->ref, {1, 0, 1, 0}}; + rcounts_array = gko::Array{this->ref, {0, 2, 1, 0}}; + soffset_array = gko::Array{this->ref, {0, 1, 1, 0}}; + roffset_array = gko::Array{this->ref, {0, 1, 3, 3}}; + ref_array = gko::Array{this->ref, {0, 2, 3, 3}}; } - gko::mpi::all_to_all_v( + gko::mpi::all_to_all_v( send_array.get_data(), scounts_array.get_data(), soffset_array.get_data(), recv_array.get_data(), rcounts_array.get_data(), roffset_array.get_data(), {}, comm); @@ -741,13 +720,12 @@ TEST_F(MpiBindings, AllToAllVWorksCorrectly) } -TEST_F(MpiBindings, CanScanValues) +TYPED_TEST(MpiBindings, CanScanValues) { - using ValueType = double; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - ValueType data, sum, max, min; + TypeParam data, sum, max, min; if (my_rank == 0) { data = 3; } else if (my_rank == 1) { @@ -757,24 +735,24 @@ TEST_F(MpiBindings, CanScanValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::scan(&data, &sum, 1, MPI_SUM, comm); - gko::mpi::scan(&data, &max, 1, MPI_MAX, comm); - gko::mpi::scan(&data, &min, 1, MPI_MIN, comm); + gko::mpi::scan(&data, &sum, 1, MPI_SUM, comm); + gko::mpi::scan(&data, &max, 1, MPI_MAX, comm); + gko::mpi::scan(&data, &min, 1, MPI_MIN, comm); if (my_rank == 0) { - EXPECT_EQ(sum, 3.0); - EXPECT_EQ(max, 3.0); - EXPECT_EQ(min, 3.0); + EXPECT_EQ(sum, TypeParam{3}); + EXPECT_EQ(max, TypeParam{3}); + EXPECT_EQ(min, TypeParam{3}); } else if (my_rank == 1) { - EXPECT_EQ(sum, 8.0); - EXPECT_EQ(max, 5.0); - EXPECT_EQ(min, 3.0); + EXPECT_EQ(sum, TypeParam{8}); + EXPECT_EQ(max, TypeParam{5}); + EXPECT_EQ(min, TypeParam{3}); } else if (my_rank == 2) { - EXPECT_EQ(sum, 10.0); - EXPECT_EQ(max, 5.0); - EXPECT_EQ(min, 2.0); + EXPECT_EQ(sum, TypeParam{10}); + EXPECT_EQ(max, TypeParam{5}); + EXPECT_EQ(min, TypeParam{2}); } else if (my_rank == 3) { - EXPECT_EQ(sum, 16.0); - EXPECT_EQ(max, 6.0); - EXPECT_EQ(min, 2.0); + EXPECT_EQ(sum, TypeParam{16}); + EXPECT_EQ(max, TypeParam{6}); + EXPECT_EQ(min, TypeParam{2}); } } diff --git a/core/test/utils.hpp b/core/test/utils.hpp index c1c82d12b00..2bcae20d065 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -75,9 +75,23 @@ using ComplexValueTypes = ::testing::Types, std::complex>; #endif +using RealValueTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types; +#else + ::testing::Types; +#endif + using IndexTypes = ::testing::Types; +using PODTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types; +#else + ::testing::Types; +#endif + using ValueAndIndexTypes = #if GINKGO_DPCPP_SINGLE_MODE From f4b89ef632db731db4560052f5828df257f547b4 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 17 Nov 2021 10:13:15 +0100 Subject: [PATCH 31/59] Review update. Co-authored-by: Aditya Kashi Co-authored-by: Tobias Ribizel --- core/test/mpi/base/bindings.cpp | 220 +++++++++++-------------------- include/ginkgo/core/base/mpi.hpp | 212 +++++++++++++++-------------- 2 files changed, 189 insertions(+), 243 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 13fdb3d6d60..adb2a74e2a8 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -53,37 +53,19 @@ class MpiBindings : public ::testing::Test { MpiBindings() : ref(gko::ReferenceExecutor::create()) {} std::shared_ptr ref; - - void assert_equal_vectors(std::vector& vec_1, - std::vector& vec_2) - { - ASSERT_EQ(vec_1.size(), vec_2.size()); - for (auto i = 0; i < vec_1.size(); ++i) { - EXPECT_EQ(vec_1[i], vec_2[i]); - } - } - - void assert_equal_arrays(gko::Array& array_1, - gko::Array& array_2) - { - ASSERT_EQ(array_1.get_num_elems(), array_2.get_num_elems()); - for (gko::size_type i = 0; i < array_1.get_num_elems(); ++i) { - EXPECT_EQ(array_1.get_const_data()[i], array_2.get_const_data()[i]); - } - } }; TYPED_TEST_SUITE(MpiBindings, gko::test::PODTypes, TypenameNameGenerator); -TYPED_TEST(MpiBindings, CanSetADefaultWindow) +TYPED_TEST(MpiBindings, CanSetADefaultwindow) { gko::mpi::window win; ASSERT_EQ(win.get(), MPI_WIN_NULL); } -TYPED_TEST(MpiBindings, CanCreateWindow) +TYPED_TEST(MpiBindings, CanCreatewindow) { auto data = std::vector{1, 2, 3, 4}; auto comm = gko::mpi::communicator::create_world(); @@ -105,18 +87,16 @@ TYPED_TEST(MpiBindings, CanSendAndRecvValues) auto send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::send(send_array.data(), 4, rank, 40 + rank, - comm); + gko::mpi::send(send_array.data(), 4, rank, 40 + rank, comm); } } } else { recv_array = gko::Array{this->ref, 4}; - gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, - comm); + gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, comm); } if (my_rank != 0) { auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; - this->assert_equal_arrays(ref_array, recv_array); + GKO_ASSERT_ARRAY_EQ(ref_array, recv_array); } } @@ -135,23 +115,21 @@ TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - req1.emplace_back(gko::mpi::i_send( - send_array.data(), 4, rank, 40 + rank, comm)); + req1.emplace_back(gko::mpi::i_send(send_array.data(), 4, rank, + 40 + rank, comm)); } } } else { recv_array = gko::Array{this->ref, 4}; - req2 = std::move(gko::mpi::i_recv(recv_array.get_data(), 4, - 0, 40 + my_rank, comm)); + req2 = std::move( + gko::mpi::i_recv(recv_array.get_data(), 4, 0, 40 + my_rank, comm)); } if (my_rank == 0) { auto stat1 = gko::mpi::wait_all(req1); } else { auto stat2 = gko::mpi::wait(req2); - } - if (my_rank != 0) { auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; - this->assert_equal_arrays(ref_array, recv_array); + GKO_ASSERT_ARRAY_EQ(ref_array, recv_array); } } @@ -173,16 +151,15 @@ TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::put(data.data(), 4, rank, 0, 4, win); + gko::mpi::put(data.data(), 4, rank, 0, 4, win); win.flush(rank); } } } win.unlock_all(); - gko::mpi::synchronize(comm); auto ref = std::vector{1, 2, 3, 4}; - this->assert_equal_vectors(data, ref); + ASSERT_EQ(data, ref); } @@ -203,16 +180,15 @@ TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { win.lock(rank, 0, window::lock_type::exclusive); - gko::mpi::put(data.data(), 4, rank, 0, 4, win); + gko::mpi::put(data.data(), 4, rank, 0, 4, win); win.flush(rank); win.unlock(rank); } } } - gko::mpi::synchronize(comm); auto ref = std::vector{1, 2, 3, 4}; - this->assert_equal_vectors(data, ref); + ASSERT_EQ(data, ref); } @@ -233,21 +209,20 @@ TYPED_TEST(MpiBindings, CanPutValuesWithFence) if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::put(data.data(), 4, rank, 0, 4, win); + gko::mpi::put(data.data(), 4, rank, 0, 4, win); } } } win.fence(); - gko::mpi::synchronize(comm); auto ref = std::vector{1, 2, 3, 4}; - this->assert_equal_vectors(data, ref); + ASSERT_EQ(data, ref); } TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) { - using Window = gko::mpi::window; + using window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); @@ -257,27 +232,26 @@ TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) } else { data = std::vector{0, 0, 0, 0}; } - auto win = Window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4 * sizeof(TypeParam), comm); if (my_rank != 0) { win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::get(data.data(), 4, 0, 0, 4, win); + gko::mpi::get(data.data(), 4, 0, 0, 4, win); win.flush(0); } } win.unlock_all(); } - gko::mpi::synchronize(comm); auto ref = std::vector{1, 2, 3, 4}; - this->assert_equal_vectors(data, ref); + ASSERT_EQ(data, ref); } TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) { - using Window = gko::mpi::window; + using window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); @@ -287,27 +261,26 @@ TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) } else { data = std::vector{0, 0, 0, 0}; } - auto win = Window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4 * sizeof(TypeParam), comm); if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - win.lock(0, 0, Window::lock_type::exclusive); - gko::mpi::get(data.data(), 4, 0, 0, 4, win); + win.lock(0, 0, window::lock_type::exclusive); + gko::mpi::get(data.data(), 4, 0, 0, 4, win); win.flush(0); win.unlock(0); } } } - gko::mpi::synchronize(comm); auto ref = std::vector{1, 2, 3, 4}; - this->assert_equal_vectors(data, ref); + ASSERT_EQ(data, ref); } TYPED_TEST(MpiBindings, CanGetValuesWithFence) { - using Window = gko::mpi::window; + using window = gko::mpi::window; auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); @@ -317,20 +290,19 @@ TYPED_TEST(MpiBindings, CanGetValuesWithFence) } else { data = std::vector{0, 0, 0, 0}; } - auto win = Window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4 * sizeof(TypeParam), comm); win.fence(); if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::get(data.data(), 4, 0, 0, 4, win); + gko::mpi::get(data.data(), 4, 0, 0, 4, win); } } } win.fence(); - gko::mpi::synchronize(comm); auto ref = std::vector{1, 2, 3, 4}; - this->assert_equal_vectors(data, ref); + ASSERT_EQ(data, ref); } @@ -339,17 +311,11 @@ TYPED_TEST(MpiBindings, CanBroadcastValues) auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - TypeParam* data; auto array = gko::Array{this->ref, 8}; if (my_rank == 0) { - // clang-format off - data = new TypeParam[8]{ 2, 3, 1, - 3,-1, 0 , 3, 1}; - // clang-format on - array = gko::Array{ - gko::Array::view(this->ref, 8, data)}; - } - gko::mpi::broadcast(array.get_data(), 8, 0, comm); + array = gko::Array(this->ref, {2, 3, 1, 3, -1, 0, 3, 1}); + } + gko::mpi::broadcast(array.get_data(), 8, 0, comm); auto comp_data = array.get_data(); ASSERT_EQ(comp_data[0], TypeParam{2}); ASSERT_EQ(comp_data[1], TypeParam{3}); @@ -359,9 +325,6 @@ TYPED_TEST(MpiBindings, CanBroadcastValues) ASSERT_EQ(comp_data[5], TypeParam{0}); ASSERT_EQ(comp_data[6], TypeParam{3}); ASSERT_EQ(comp_data[7], TypeParam{1}); - if (my_rank == 0) { - delete data; - } } @@ -381,9 +344,9 @@ TYPED_TEST(MpiBindings, CanReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::reduce(&data, &sum, 1, MPI_SUM, 0, comm); - gko::mpi::reduce(&data, &max, 1, MPI_MAX, 0, comm); - gko::mpi::reduce(&data, &min, 1, MPI_MIN, 0, comm); + gko::mpi::reduce(&data, &sum, 1, MPI_SUM, 0, comm); + gko::mpi::reduce(&data, &max, 1, MPI_MAX, 0, comm); + gko::mpi::reduce(&data, &min, 1, MPI_MIN, 0, comm); if (my_rank == 0) { EXPECT_EQ(sum, TypeParam{16}); EXPECT_EQ(max, TypeParam{6}); @@ -407,7 +370,7 @@ TYPED_TEST(MpiBindings, CanAllReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, &sum, 1, MPI_SUM, comm); + gko::mpi::all_reduce(&data, &sum, 1, MPI_SUM, comm); ASSERT_EQ(sum, TypeParam{16}); } @@ -427,7 +390,7 @@ TYPED_TEST(MpiBindings, CanAllReduceValuesInPlace) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, 1, MPI_SUM, comm); + gko::mpi::all_reduce(&data, 1, MPI_SUM, comm); ASSERT_EQ(data, TypeParam{16}); } @@ -437,26 +400,19 @@ TYPED_TEST(MpiBindings, CanScatterValues) auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - TypeParam* data; - auto scatter_from_array = gko::Array{this->ref->get_master()}; + auto scatter_from_array = gko::Array{this->ref}; if (my_rank == 0) { - // clang-format off - data = new TypeParam[8]{ 2, 3, 1, - 3,-1, 0 , 3, 1}; - // clang-format on - scatter_from_array = gko::Array{ - this->ref->get_master(), - gko::Array::view(this->ref, 8, data)}; + scatter_from_array = + gko::Array{this->ref, {2, 3, 1, 3, -1, 0, 3, 1}}; } auto scatter_into_array = gko::Array{this->ref, 2}; - gko::mpi::scatter(scatter_from_array.get_data(), 2, - scatter_into_array.get_data(), 2, 0, - comm); + gko::mpi::scatter(scatter_from_array.get_data(), 2, + scatter_into_array.get_data(), 2, 0, comm); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], TypeParam{2}); ASSERT_EQ(comp_data[1], TypeParam{3}); - delete data; + } else if (my_rank == 1) { ASSERT_EQ(comp_data[0], TypeParam{1}); ASSERT_EQ(comp_data[1], TypeParam{3}); @@ -487,8 +443,7 @@ TYPED_TEST(MpiBindings, CanGatherValues) } auto gather_array = gko::Array{ this->ref, static_cast(num_ranks)}; - gko::mpi::gather(&data, 1, gather_array.get_data(), 1, - 0, comm); + gko::mpi::gather(&data, 1, gather_array.get_data(), 1, 0, comm); if (my_rank == 0) { ASSERT_EQ(gather_array.get_data()[0], TypeParam{3}); ASSERT_EQ(gather_array.get_data()[1], TypeParam{5}); @@ -503,7 +458,6 @@ TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - TypeParam* data; auto scatter_from_array = gko::Array{this->ref}; auto scatter_into_array = gko::Array{this->ref}; auto s_counts = gko::Array{this->ref->get_master(), @@ -511,13 +465,8 @@ TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) auto displacements = gko::Array{this->ref->get_master()}; int nelems; if (my_rank == 0) { - // clang-format off - data = new TypeParam[10]{ 2, 3, 1, - 3,-1, 0, - 2,-1, 0, 3}; - // clang-format on - scatter_from_array = gko::Array{ - this->ref, gko::Array::view(this->ref, 10, data)}; + scatter_from_array = + gko::Array{this->ref, {2, 3, 1, 3, -1, 0, 2, -1, 0, 3}}; nelems = 2; displacements = gko::Array{this->ref, {0, 2, 6, 9}}; } else if (my_rank == 1) { @@ -529,16 +478,15 @@ TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) } scatter_into_array = gko::Array{this->ref, static_cast(nelems)}; - gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0, comm); - gko::mpi::scatter_v( - scatter_from_array.get_data(), s_counts.get_data(), - displacements.get_data(), scatter_into_array.get_data(), nelems, 0, - comm); + gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0, comm); + gko::mpi::scatter_v(scatter_from_array.get_data(), s_counts.get_data(), + displacements.get_data(), scatter_into_array.get_data(), + nelems, 0, comm); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], TypeParam{2}); ASSERT_EQ(comp_data[1], TypeParam{3}); - delete data; + } else if (my_rank == 1) { ASSERT_EQ(comp_data[0], TypeParam{1}); ASSERT_EQ(comp_data[1], TypeParam{3}); @@ -559,54 +507,40 @@ TYPED_TEST(MpiBindings, CanGatherValuesWithDisplacements) auto comm = gko::mpi::communicator::create_world(); auto my_rank = comm->rank(); auto num_ranks = comm->size(); - TypeParam* data; auto gather_from_array = gko::Array{this->ref}; auto gather_into_array = gko::Array{this->ref}; - auto r_counts = gko::Array{this->ref->get_master(), - static_cast(num_ranks)}; - auto displacements = gko::Array{this->ref->get_master()}; + auto r_counts = + gko::Array{this->ref, static_cast(num_ranks)}; + auto displacements = gko::Array{this->ref}; int nelems; if (my_rank == 0) { - data = new TypeParam[2]{2, 3}; - gather_from_array = gko::Array{ - this->ref->get_master(), - gko::Array::view(this->ref->get_master(), 2, data)}; + gather_from_array = gko::Array{this->ref, {2, 3}}; nelems = 2; - displacements = gko::Array{this->ref->get_master(), {0, 2, 6, 7}}; + displacements = gko::Array{this->ref, {0, 2, 6, 7}}; gather_into_array = gko::Array{this->ref, 10}; } else if (my_rank == 1) { - data = new TypeParam[4]{1, 2, 1, 0}; nelems = 4; - gather_from_array = gko::Array{ - this->ref->get_master(), - gko::Array::view(this->ref->get_master(), 4, data)}; + gather_from_array = gko::Array{this->ref, {1, 2, 1, 0}}; } else if (my_rank == 2) { - data = new TypeParam[1]{1}; nelems = 1; - gather_from_array = gko::Array{ - this->ref->get_master(), - gko::Array::view(this->ref->get_master(), 1, data)}; + gather_from_array = gko::Array{this->ref, {1}}; } else if (my_rank == 3) { - data = new TypeParam[3]{1, -4, 5}; nelems = 3; - gather_from_array = gko::Array{ - this->ref->get_master(), - gko::Array::view(this->ref->get_master(), 3, data)}; + gather_from_array = gko::Array{this->ref, {1, -4, 5}}; } - gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0, comm); - gko::mpi::gather_v( - gather_from_array.get_data(), nelems, gather_into_array.get_data(), - r_counts.get_data(), displacements.get_data(), 0, comm); + gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0, comm); + gko::mpi::gather_v(gather_from_array.get_data(), nelems, + gather_into_array.get_data(), r_counts.get_data(), + displacements.get_data(), 0, comm); auto comp_data = gather_into_array.get_data(); if (my_rank == 0) { auto ref_array = gko::Array(this->ref, {2, 3, 1, 2, 1, 0, 1, 1, -4, 5}); - this->assert_equal_arrays(gather_into_array, ref_array); + GKO_ASSERT_ARRAY_EQ(gather_into_array, ref_array); } else { ASSERT_EQ(comp_data, nullptr); } - delete data; } @@ -633,9 +567,9 @@ TYPED_TEST(MpiBindings, AllToAllWorksCorrectly) ref_array = gko::Array(this->ref, {2, 2, 0, -2}); } - gko::mpi::all_to_all(send_array.get_data(), 1, - recv_array.get_data(), 1, comm); - this->assert_equal_arrays(recv_array, ref_array); + gko::mpi::all_to_all(send_array.get_data(), 1, recv_array.get_data(), 1, + comm); + GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } @@ -661,8 +595,8 @@ TYPED_TEST(MpiBindings, AllToAllInPlaceWorksCorrectly) ref_array = gko::Array(this->ref, {2, 2, 0, -2}); } - gko::mpi::all_to_all(recv_array.get_data(), 1, comm); - this->assert_equal_arrays(recv_array, ref_array); + gko::mpi::all_to_all(recv_array.get_data(), 1, comm); + GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } @@ -712,11 +646,11 @@ TYPED_TEST(MpiBindings, AllToAllVWorksCorrectly) ref_array = gko::Array{this->ref, {0, 2, 3, 3}}; } - gko::mpi::all_to_all_v( - send_array.get_data(), scounts_array.get_data(), - soffset_array.get_data(), recv_array.get_data(), - rcounts_array.get_data(), roffset_array.get_data(), {}, comm); - this->assert_equal_arrays(recv_array, ref_array); + gko::mpi::all_to_all_v(send_array.get_data(), scounts_array.get_data(), + soffset_array.get_data(), recv_array.get_data(), + rcounts_array.get_data(), roffset_array.get_data(), + {}, comm); + GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } @@ -735,9 +669,9 @@ TYPED_TEST(MpiBindings, CanScanValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::scan(&data, &sum, 1, MPI_SUM, comm); - gko::mpi::scan(&data, &max, 1, MPI_MAX, comm); - gko::mpi::scan(&data, &min, 1, MPI_MIN, comm); + gko::mpi::scan(&data, &sum, 1, MPI_SUM, comm); + gko::mpi::scan(&data, &max, 1, MPI_MAX, comm); + gko::mpi::scan(&data, &min, 1, MPI_MIN, comm); if (my_rank == 0) { EXPECT_EQ(sum, TypeParam{3}); EXPECT_EQ(max, TypeParam{3}); diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 697777cb827..9ae3678aa9d 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -76,96 +76,94 @@ namespace detail { template -constexpr MPI_Datatype mpi_type_impl() -{ - return MPI_C_BOOL; -} +struct mpi_type_impl { + constexpr static MPI_Datatype get_type() { return MPI_DATATYPE_NULL; } +}; template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_CHAR; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_UNSIGNED_CHAR; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_UNSIGNED; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_INT; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_UNSIGNED_SHORT; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_UNSIGNED_LONG; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_LONG; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_FLOAT; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_DOUBLE; } template <> -constexpr MPI_Datatype mpi_type_impl() +constexpr MPI_Datatype mpi_type_impl::get_type() { return MPI_LONG_DOUBLE; } template <> -constexpr MPI_Datatype mpi_type_impl>() +constexpr MPI_Datatype mpi_type_impl>::get_type() { return MPI_C_COMPLEX; } template <> -constexpr MPI_Datatype mpi_type_impl>() +constexpr MPI_Datatype mpi_type_impl>::get_type() { return MPI_C_DOUBLE_COMPLEX; } - template inline const T* in_place() { @@ -175,14 +173,6 @@ inline const T* in_place() } // namespace detail - -template -constexpr MPI_Datatype get_type() -{ - return detail::mpi_type_impl(); -} - - /* * Class that sets up and finalizes the MPI exactly once per program execution. * using the singleton pattern. This must be called before any of the MPI @@ -606,9 +596,9 @@ void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, - get_type(), destination_rank, - send_tag, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Send( + send_buffer, send_count, detail::mpi_type_impl::get_type(), + destination_rank, send_tag, comm->get())); } @@ -630,9 +620,9 @@ MPI_Request i_send(const SendType* send_buffer, const int send_count, std::shared_ptr comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend(send_buffer, send_count, - get_type(), destination_rank, - send_tag, comm->get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend( + send_buffer, send_count, detail::mpi_type_impl::get_type(), + destination_rank, send_tag, comm->get(), &req)); return req; } @@ -652,8 +642,9 @@ void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, std::shared_ptr status = {}) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv( - recv_buffer, recv_count, get_type(), source_rank, recv_tag, - comm->get(), status ? status->get() : MPI_STATUS_IGNORE)); + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + source_rank, recv_tag, comm->get(), + status ? status->get() : MPI_STATUS_IGNORE)); } @@ -675,9 +666,9 @@ MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, - get_type(), source_rank, - recv_tag, comm->get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv( + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + source_rank, recv_tag, comm->get(), &req)); return req; } @@ -697,9 +688,10 @@ void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Put(origin_buffer, origin_count, get_type(), target_rank, - target_disp, target_count, get_type(), window.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Put( + origin_buffer, origin_count, detail::mpi_type_impl::get_type(), + target_rank, target_disp, target_count, + detail::mpi_type_impl::get_type(), window.get())); } @@ -722,8 +714,9 @@ MPI_Request r_put(const PutType* origin_buffer, const int origin_count, { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput( - origin_buffer, origin_count, get_type(), target_rank, - target_disp, target_count, get_type(), window.get(), &req)); + origin_buffer, origin_count, detail::mpi_type_impl::get_type(), + target_rank, target_disp, target_count, + detail::mpi_type_impl::get_type(), window.get(), &req)); return req; } @@ -743,9 +736,10 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Get(origin_buffer, origin_count, get_type(), target_rank, - target_disp, target_count, get_type(), window.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Get( + origin_buffer, origin_count, detail::mpi_type_impl::get_type(), + target_rank, target_disp, target_count, + detail::mpi_type_impl::get_type(), window.get())); } @@ -767,9 +761,10 @@ MPI_Request r_get(GetType* origin_buffer, const int origin_count, const int target_count, window& window) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Rget(origin_buffer, origin_count, get_type(), target_rank, - target_disp, target_count, get_type(), window, &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( + origin_buffer, origin_count, detail::mpi_type_impl::get_type(), + target_rank, target_disp, target_count, + detail::mpi_type_impl::get_type(), window, &req)); return req; } @@ -786,8 +781,9 @@ template void broadcast(BroadcastType* buffer, int count, int root_rank, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count, get_type(), - root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast( + buffer, count, detail::mpi_type_impl::get_type(), + root_rank, comm->get())); } @@ -805,9 +801,10 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, MPI_Op operation, int root_rank, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, - get_type(), operation, - root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Reduce(send_buffer, recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, + root_rank, comm->get())); } @@ -828,9 +825,10 @@ MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, std::shared_ptr comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(send_buffer, recv_buffer, count, - get_type(), operation, - root_rank, comm->get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Ireduce(send_buffer, recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, + root_rank, comm->get(), &req)); return req; } @@ -848,9 +846,9 @@ template void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Allreduce(detail::in_place(), recv_buffer, count, - get_type(), operation, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( + detail::in_place(), recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, comm->get())); } @@ -872,7 +870,8 @@ MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( MPI_Iallreduce(detail::in_place(), recv_buffer, count, - get_type(), operation, comm->get(), &req)); + detail::mpi_type_impl::get_type(), operation, + comm->get(), &req)); return req; } @@ -892,9 +891,9 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, MPI_Op operation, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(send_buffer, recv_buffer, count, - get_type(), operation, - comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( + send_buffer, recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, comm->get())); } @@ -916,9 +915,10 @@ MPI_Request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, std::shared_ptr comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, - get_type(), operation, - comm->get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Iallreduce(send_buffer, recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, + comm->get(), &req)); return req; } @@ -938,9 +938,10 @@ void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Gather(send_buffer, send_count, get_type(), recv_buffer, - recv_count, get_type(), root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather( + send_buffer, send_count, detail::mpi_type_impl::get_type(), + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + root_rank, comm->get())); } @@ -963,8 +964,9 @@ void gather_v(const SendType* send_buffer, const int send_count, std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( - send_buffer, send_count, get_type(), recv_buffer, recv_counts, - displacements, get_type(), root_rank, comm->get())); + send_buffer, send_count, detail::mpi_type_impl::get_type(), + recv_buffer, recv_counts, displacements, + detail::mpi_type_impl::get_type(), root_rank, comm->get())); } @@ -983,8 +985,9 @@ void all_gather(const SendType* send_buffer, const int send_count, std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( - send_buffer, send_count, get_type(), recv_buffer, recv_count, - get_type(), comm->get())); + send_buffer, send_count, detail::mpi_type_impl::get_type(), + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + comm->get())); } @@ -1002,9 +1005,10 @@ void scatter(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Scatter(send_buffer, send_count, get_type(), recv_buffer, - recv_count, get_type(), root_rank, comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( + send_buffer, send_count, detail::mpi_type_impl::get_type(), + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + root_rank, comm->get())); } @@ -1025,8 +1029,9 @@ void scatter_v(const SendType* send_buffer, const int* send_counts, std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( - send_buffer, send_counts, displacements, get_type(), - recv_buffer, recv_count, get_type(), root_rank, comm->get())); + send_buffer, send_counts, displacements, + detail::mpi_type_impl::get_type(), recv_buffer, recv_count, + detail::mpi_type_impl::get_type(), root_rank, comm->get())); } @@ -1046,8 +1051,9 @@ void all_to_all(RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( - detail::in_place(), recv_count, get_type(), - recv_buffer, recv_count, get_type(), comm->get())); + detail::in_place(), recv_count, + detail::mpi_type_impl::get_type(), recv_buffer, recv_count, + detail::mpi_type_impl::get_type(), comm->get())); } @@ -1070,8 +1076,9 @@ MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( - detail::in_place(), recv_count, get_type(), - recv_buffer, recv_count, get_type(), comm->get(), &req)); + detail::in_place(), recv_count, + detail::mpi_type_impl::get_type(), recv_buffer, recv_count, + detail::mpi_type_impl::get_type(), comm->get(), &req)); return req; } @@ -1091,9 +1098,10 @@ void all_to_all(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Alltoall(send_buffer, send_count, get_type(), recv_buffer, - recv_count, get_type(), comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( + send_buffer, send_count, detail::mpi_type_impl::get_type(), + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + comm->get())); } @@ -1116,8 +1124,9 @@ MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( - send_buffer, send_count, get_type(), recv_buffer, recv_count, - get_type(), comm->get(), &req)); + send_buffer, send_count, detail::mpi_type_impl::get_type(), + recv_buffer, recv_count, detail::mpi_type_impl::get_type(), + comm->get(), &req)); return req; } @@ -1141,10 +1150,11 @@ void all_to_all_v(const SendType* send_buffer, const int* send_counts, const int* recv_counts, const int* recv_offsets, const int stride, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Alltoallv(send_buffer, send_counts, send_offsets, - get_type(), recv_buffer, recv_counts, - recv_offsets, get_type(), comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( + send_buffer, send_counts, send_offsets, + detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, + recv_offsets, detail::mpi_type_impl::get_type(), + comm->get())); } @@ -1171,10 +1181,11 @@ MPI_Request i_all_to_all_v(const SendType* send_buffer, const int* send_counts, std::shared_ptr comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Ialltoallv(send_buffer, send_counts, send_offsets, - get_type(), recv_buffer, recv_counts, - recv_offsets, get_type(), comm->get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( + send_buffer, send_counts, send_offsets, + detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, + recv_offsets, detail::mpi_type_impl::get_type(), comm->get(), + &req)); return req; } @@ -1194,9 +1205,9 @@ template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, MPI_Op operation, std::shared_ptr comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, - get_type(), operation, - comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan( + send_buffer, recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, comm->get())); } @@ -1218,9 +1229,10 @@ MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, std::shared_ptr comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, - get_type(), operation, - comm->get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Iscan(send_buffer, recv_buffer, count, + detail::mpi_type_impl::get_type(), operation, + comm->get(), &req)); return req; } From fa1d9102a448577f2975462c9bc0c5bf68a85d17 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 17 Nov 2021 15:48:32 +0100 Subject: [PATCH 32/59] Remove info struct and use MPI_Info --- include/ginkgo/core/base/mpi.hpp | 100 ++++++------------------------- 1 file changed, 17 insertions(+), 83 deletions(-) diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 9ae3678aa9d..9ee5009bce6 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -215,76 +215,6 @@ class init_finalize { }; -/** - * A class holding and operating on the MPI_Info class. Stores the key value - * pair as a map and provides methods to access these values with keys as - * strings. - */ -class info { -public: - info() : info_(MPI_INFO_NULL) {} - - explicit info(MPI_Info input_info) - { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_dup(input_info, &this->info_)); - } - - void create_default() - { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_create(&this->info_)); - } - - void remove(std::string key) - { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_delete(this->info_, key.c_str())); - } - - std::string& at(std::string& key) { return this->key_value_.at(key); } - - void add(std::string key, std::string value) - { - this->key_value_[key] = value; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Info_set(this->info_, key.c_str(), value.c_str())); - } - - MPI_Info get() { return this->info_; } - - ~info() - { - if (this->info_ != MPI_INFO_NULL) { - MPI_Info_free(&this->info_); - } - } - -private: - std::map key_value_; - MPI_Info info_; -}; - - -/** - * A status class that allows creation of MPI_Status and - * frees the status array when it goes out of scope - */ -class status : public EnableSharedCreateMethod { -public: - status(const int size) : status_(new MPI_Status[size]) {} - - status() : status_(new MPI_Status[1]) {} - - ~status() - { - if (status_) delete[] status_; - } - - MPI_Status* get() const { return status_; } - -private: - MPI_Status* status_; -}; - - /** * A communicator class that takes in the given communicator and duplicates it * for our purposes. As the class or object goes out of scope, the communicator @@ -494,20 +424,21 @@ class window { window(ValueType* base, unsigned int size, std::shared_ptr comm, - const int disp_unit = sizeof(ValueType), info input_info = info(), + const int disp_unit = sizeof(ValueType), + MPI_Info input_info = MPI_INFO_NULL, win_type create_type = win_type::create) { if (create_type == win_type::create) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Win_create(base, size, disp_unit, input_info.get(), - comm->get(), &this->window_)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create(base, size, disp_unit, + input_info, comm->get(), + &this->window_)); } else if (create_type == win_type::dynamic_create) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create_dynamic( - input_info.get(), comm->get(), &this->window_)); + input_info, comm->get(), &this->window_)); } else if (create_type == win_type::allocate) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Win_allocate(size, disp_unit, input_info.get(), comm->get(), - base, &this->window_)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_allocate(size, disp_unit, + input_info, comm->get(), + base, &this->window_)); } else { GKO_NOT_IMPLEMENTED; } @@ -635,16 +566,19 @@ MPI_Request i_send(const SendType* send_buffer, const int send_count, * @param source_rank the rank to send the data to * @param recv_tag the tag for the send call * @param comm the communicator + * + * @return the status of completion of this call */ template -void recv(RecvType* recv_buffer, const int recv_count, const int source_rank, - const int recv_tag, std::shared_ptr comm, - std::shared_ptr status = {}) +MPI_Status recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag, + std::shared_ptr comm) { + MPI_Status status; GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv( recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - source_rank, recv_tag, comm->get(), - status ? status->get() : MPI_STATUS_IGNORE)); + source_rank, recv_tag, comm->get(), &status)); + return status; } From b377becdc3e61c035444c690ee58480fb4eb64fe Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 17 Nov 2021 16:15:51 +0100 Subject: [PATCH 33/59] Make MPI calls inline --- core/test/mpi/base/bindings.cpp | 2 +- include/ginkgo/core/base/mpi.hpp | 173 ++++++++++++++++--------------- 2 files changed, 89 insertions(+), 86 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index adb2a74e2a8..770c0a104d6 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -649,7 +649,7 @@ TYPED_TEST(MpiBindings, AllToAllVWorksCorrectly) gko::mpi::all_to_all_v(send_array.get_data(), scounts_array.get_data(), soffset_array.get_data(), recv_array.get_data(), rcounts_array.get_data(), roffset_array.get_data(), - {}, comm); + comm); GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 9ee5009bce6..13dedb6ec53 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -523,9 +523,9 @@ class window { * @param comm the communicator */ template -void send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - std::shared_ptr comm) +inline void send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Send( send_buffer, send_count, detail::mpi_type_impl::get_type(), @@ -546,9 +546,9 @@ void send(const SendType* send_buffer, const int send_count, * @return the request handle for the send call */ template -MPI_Request i_send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - std::shared_ptr comm) +inline MPI_Request i_send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend( @@ -570,9 +570,9 @@ MPI_Request i_send(const SendType* send_buffer, const int send_count, * @return the status of completion of this call */ template -MPI_Status recv(RecvType* recv_buffer, const int recv_count, - const int source_rank, const int recv_tag, - std::shared_ptr comm) +inline MPI_Status recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag, + std::shared_ptr comm) { MPI_Status status; GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv( @@ -595,9 +595,9 @@ MPI_Status recv(RecvType* recv_buffer, const int recv_count, * @return the request handle for the send call */ template -MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, - const int source_rank, const int recv_tag, - std::shared_ptr comm) +inline MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv( @@ -618,9 +618,9 @@ MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, * @param window the window to put the data into */ template -void put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) +inline void put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Put( origin_buffer, origin_count, detail::mpi_type_impl::get_type(), @@ -642,9 +642,9 @@ void put(const PutType* origin_buffer, const int origin_count, * @return the request handle for the send call */ template -MPI_Request r_put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) +inline MPI_Request r_put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput( @@ -666,9 +666,9 @@ MPI_Request r_put(const PutType* origin_buffer, const int origin_count, * @param window the window to put the data into */ template -void get(GetType* origin_buffer, const int origin_count, const int target_rank, - const unsigned int target_disp, const int target_count, - window& window) +inline void get(GetType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Get( origin_buffer, origin_count, detail::mpi_type_impl::get_type(), @@ -690,9 +690,9 @@ void get(GetType* origin_buffer, const int origin_count, const int target_rank, * @return the request handle for the send call */ template -MPI_Request r_get(GetType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) +inline MPI_Request r_get(GetType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count, window& window) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( @@ -712,8 +712,8 @@ MPI_Request r_get(GetType* origin_buffer, const int origin_count, * @param comm the communicator */ template -void broadcast(BroadcastType* buffer, int count, int root_rank, - std::shared_ptr comm) +inline void broadcast(BroadcastType* buffer, int count, int root_rank, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast( buffer, count, detail::mpi_type_impl::get_type(), @@ -731,9 +731,9 @@ void broadcast(BroadcastType* buffer, int count, int root_rank, * @param comm the communicator */ template -void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - MPI_Op operation, int root_rank, - std::shared_ptr comm) +inline void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, int root_rank, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS( MPI_Reduce(send_buffer, recv_buffer, count, @@ -754,9 +754,10 @@ void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, * @return the request handle for the call */ template -MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, int root_rank, - std::shared_ptr comm) +inline MPI_Request i_reduce(const ReduceType* send_buffer, + ReduceType* recv_buffer, int count, + MPI_Op operation, int root_rank, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( @@ -777,8 +778,8 @@ MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, * @param comm the communicator */ template -void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, - std::shared_ptr comm) +inline void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( detail::in_place(), recv_buffer, count, @@ -798,8 +799,9 @@ void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, * @return the request handle for the call */ template -MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, - std::shared_ptr comm) +inline MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, + MPI_Op operation, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( @@ -821,9 +823,9 @@ MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, * @param comm the communicator */ template -void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, - std::shared_ptr comm) +inline void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( send_buffer, recv_buffer, count, @@ -844,9 +846,10 @@ void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, * @return the request handle for the call */ template -MPI_Request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, - std::shared_ptr comm) +inline MPI_Request i_all_reduce(const ReduceType* send_buffer, + ReduceType* recv_buffer, int count, + MPI_Op operation, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( @@ -868,9 +871,9 @@ MPI_Request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, * @param comm the communicator */ template -void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) +inline void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather( send_buffer, send_count, detail::mpi_type_impl::get_type(), @@ -892,10 +895,10 @@ void gather(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void gather_v(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank, - std::shared_ptr comm) +inline void gather_v(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( send_buffer, send_count, detail::mpi_type_impl::get_type(), @@ -914,9 +917,9 @@ void gather_v(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void all_gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) +inline void all_gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( send_buffer, send_count, detail::mpi_type_impl::get_type(), @@ -935,9 +938,9 @@ void all_gather(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void scatter(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) +inline void scatter(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( send_buffer, send_count, detail::mpi_type_impl::get_type(), @@ -957,10 +960,10 @@ void scatter(const SendType* send_buffer, const int send_count, * @param comm the communicator */ template -void scatter_v(const SendType* send_buffer, const int* send_counts, - const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank, - std::shared_ptr comm) +inline void scatter_v(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( send_buffer, send_counts, displacements, @@ -981,8 +984,8 @@ void scatter_v(const SendType* send_buffer, const int* send_counts, * are the same. */ template -void all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) +inline void all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( detail::in_place(), recv_count, @@ -1005,8 +1008,8 @@ void all_to_all(RecvType* recv_buffer, const int recv_count, * are the same. */ template -MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) +inline MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( @@ -1028,9 +1031,9 @@ MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, * @param comm the communicator */ template -void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) +inline void all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( send_buffer, send_count, detail::mpi_type_impl::get_type(), @@ -1052,9 +1055,10 @@ void all_to_all(const SendType* send_buffer, const int send_count, * @return the request handle for the call */ template -MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) +inline MPI_Request i_all_to_all(const SendType* send_buffer, + const int send_count, RecvType* recv_buffer, + const int recv_count, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( @@ -1075,14 +1079,13 @@ MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, * @param recv_buffer the buffer to gather into * @param recv_count the number of elements to receive * @param recv_offsets the offsets for the recv buffer - * @param stride the stride to be used in case of sending concatenated data * @param comm the communicator */ template -void all_to_all_v(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const int stride, std::shared_ptr comm) +inline void all_to_all_v(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets, + std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( send_buffer, send_counts, send_offsets, @@ -1102,17 +1105,17 @@ void all_to_all_v(const SendType* send_buffer, const int* send_counts, * @param recv_buffer the buffer to gather into * @param recv_count the number of elements to receive * @param recv_offsets the offsets for the recv buffer - * @param stride the stride to be used in case of sending concatenated data * @param comm the communicator * * @return the request handle for the call */ template -MPI_Request i_all_to_all_v(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const int stride, - std::shared_ptr comm) +inline MPI_Request i_all_to_all_v(const SendType* send_buffer, + const int* send_counts, + const int* send_offsets, + RecvType* recv_buffer, const int* recv_counts, + const int* recv_offsets, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( @@ -1136,8 +1139,8 @@ MPI_Request i_all_to_all_v(const SendType* send_buffer, const int* send_counts, * @param req the request handle */ template -void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - MPI_Op operation, std::shared_ptr comm) +inline void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, + MPI_Op operation, std::shared_ptr comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan( send_buffer, recv_buffer, count, @@ -1158,9 +1161,9 @@ void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, * @return the request handle for the call */ template -MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, - int count, MPI_Op operation, - std::shared_ptr comm) +inline MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, + int count, MPI_Op operation, + std::shared_ptr comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( From 0b9a5105a1a3bae815522ec2687fa103e42d3ef6 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 18 Nov 2021 11:12:47 +0100 Subject: [PATCH 34/59] Overhaul communicator --- core/test/mpi/base/bindings.cpp | 122 ++++++------- core/test/mpi/base/communicator.cpp | 119 ++++++------- include/ginkgo/core/base/mpi.hpp | 265 +++++++++++++++------------- 3 files changed, 254 insertions(+), 252 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 770c0a104d6..3f0857748b8 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -68,7 +68,7 @@ TYPED_TEST(MpiBindings, CanSetADefaultwindow) TYPED_TEST(MpiBindings, CanCreatewindow) { auto data = std::vector{1, 2, 3, 4}; - auto comm = gko::mpi::communicator::create_world(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); auto win = gko::mpi::window(data.data(), 4 * sizeof(TypeParam), comm); ASSERT_NE(win.get(), MPI_WIN_NULL); @@ -79,9 +79,9 @@ TYPED_TEST(MpiBindings, CanCreatewindow) TYPED_TEST(MpiBindings, CanSendAndRecvValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto recv_array = gko::Array{this->ref}; if (my_rank == 0) { auto send_array = std::vector{1, 2, 3, 4}; @@ -103,9 +103,9 @@ TYPED_TEST(MpiBindings, CanSendAndRecvValues) TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector send_array; auto recv_array = gko::Array{this->ref}; TypeParam* data; @@ -137,9 +137,9 @@ TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) { using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector data; if (my_rank == 0) { data = std::vector{1, 2, 3, 4}; @@ -166,9 +166,9 @@ TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) { using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector data; if (my_rank == 0) { data = std::vector{1, 2, 3, 4}; @@ -195,9 +195,9 @@ TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) TYPED_TEST(MpiBindings, CanPutValuesWithFence) { using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector data; if (my_rank == 0) { data = std::vector{1, 2, 3, 4}; @@ -223,9 +223,9 @@ TYPED_TEST(MpiBindings, CanPutValuesWithFence) TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) { using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector data; if (my_rank == 0) { data = std::vector{1, 2, 3, 4}; @@ -252,9 +252,9 @@ TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) { using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector data; if (my_rank == 0) { data = std::vector{1, 2, 3, 4}; @@ -281,9 +281,9 @@ TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) TYPED_TEST(MpiBindings, CanGetValuesWithFence) { using window = gko::mpi::window; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); std::vector data; if (my_rank == 0) { data = std::vector{1, 2, 3, 4}; @@ -308,9 +308,9 @@ TYPED_TEST(MpiBindings, CanGetValuesWithFence) TYPED_TEST(MpiBindings, CanBroadcastValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto array = gko::Array{this->ref, 8}; if (my_rank == 0) { array = gko::Array(this->ref, {2, 3, 1, 3, -1, 0, 3, 1}); @@ -331,9 +331,9 @@ TYPED_TEST(MpiBindings, CanBroadcastValues) TYPED_TEST(MpiBindings, CanReduceValues) { using TypeParam = TypeParam; - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); TypeParam data, sum, max, min; if (my_rank == 0) { data = 3; @@ -357,9 +357,9 @@ TYPED_TEST(MpiBindings, CanReduceValues) TYPED_TEST(MpiBindings, CanAllReduceValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); TypeParam data, sum; if (my_rank == 0) { data = 3; @@ -377,9 +377,9 @@ TYPED_TEST(MpiBindings, CanAllReduceValues) TYPED_TEST(MpiBindings, CanAllReduceValuesInPlace) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); TypeParam data; if (my_rank == 0) { data = 3; @@ -397,9 +397,9 @@ TYPED_TEST(MpiBindings, CanAllReduceValuesInPlace) TYPED_TEST(MpiBindings, CanScatterValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto scatter_from_array = gko::Array{this->ref}; if (my_rank == 0) { scatter_from_array = @@ -428,9 +428,9 @@ TYPED_TEST(MpiBindings, CanScatterValues) TYPED_TEST(MpiBindings, CanGatherValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); TypeParam data; if (my_rank == 0) { data = 3; @@ -455,9 +455,9 @@ TYPED_TEST(MpiBindings, CanGatherValues) TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto scatter_from_array = gko::Array{this->ref}; auto scatter_into_array = gko::Array{this->ref}; auto s_counts = gko::Array{this->ref->get_master(), @@ -504,9 +504,9 @@ TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) TYPED_TEST(MpiBindings, CanGatherValuesWithDisplacements) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto gather_from_array = gko::Array{this->ref}; auto gather_into_array = gko::Array{this->ref}; auto r_counts = @@ -546,9 +546,9 @@ TYPED_TEST(MpiBindings, CanGatherValuesWithDisplacements) TYPED_TEST(MpiBindings, AllToAllWorksCorrectly) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto send_array = gko::Array{this->ref}; auto recv_array = gko::Array{this->ref}; auto ref_array = gko::Array{this->ref}; @@ -575,9 +575,9 @@ TYPED_TEST(MpiBindings, AllToAllWorksCorrectly) TYPED_TEST(MpiBindings, AllToAllInPlaceWorksCorrectly) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto recv_array = gko::Array{this->ref}; auto ref_array = gko::Array{this->ref}; recv_array = gko::Array{this->ref, 4}; @@ -602,9 +602,9 @@ TYPED_TEST(MpiBindings, AllToAllInPlaceWorksCorrectly) TYPED_TEST(MpiBindings, AllToAllVWorksCorrectly) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); auto send_array = gko::Array{this->ref}; auto recv_array = gko::Array{this->ref}; auto ref_array = gko::Array{this->ref}; @@ -656,9 +656,9 @@ TYPED_TEST(MpiBindings, AllToAllVWorksCorrectly) TYPED_TEST(MpiBindings, CanScanValues) { - auto comm = gko::mpi::communicator::create_world(); - auto my_rank = comm->rank(); - auto num_ranks = comm->size(); + auto comm = gko::mpi::communicator(MPI_COMM_WORLD); + auto my_rank = comm.rank(); + auto num_ranks = comm.size(); TypeParam data, sum, max, min; if (my_rank == 0) { data = 3; diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index b6ce9cabb8a..399f97db741 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include namespace { @@ -59,125 +58,111 @@ class Communicator : public ::testing::Test { }; -TEST_F(Communicator, DefaultCommIsInvalid) -{ - auto comm = gko::mpi::communicator(); - - EXPECT_EQ(comm.get(), MPI_COMM_NULL); -} - - -TEST_F(Communicator, CanCreateWorld) +TEST_F(Communicator, CommKnowsItsSize) { - auto comm = gko::mpi::communicator::create_world(); + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); - EXPECT_EQ(comm->compare(MPI_COMM_WORLD), true); + EXPECT_EQ(comm.size(), size); } -TEST_F(Communicator, KnowsItsCommunicator) +TEST_F(Communicator, CommKnowsItsRank) { - MPI_Comm dup; - MPI_Comm_dup(MPI_COMM_WORLD, &dup); - auto comm_world = gko::mpi::communicator(dup); + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); - EXPECT_EQ(comm_world.compare(dup), true); + EXPECT_EQ(comm.rank(), rank); } -TEST_F(Communicator, CommunicatorCanBeCopied) +TEST_F(Communicator, CommKnowsItsLocalRank) { - auto copy = comm; + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); - EXPECT_EQ(comm.compare(MPI_COMM_WORLD), true); - EXPECT_EQ(copy.compare(MPI_COMM_WORLD), true); + // Expect local rank to be same as rank when on one node + EXPECT_EQ(comm.node_local_rank(), rank); } TEST_F(Communicator, CommunicatorCanBeCopyConstructed) { - auto copy = gko::mpi::communicator(comm); + int rank = 5; + MPI_Comm_rank(comm.get(), &rank); + gko::mpi::communicator copy(comm); - EXPECT_EQ(comm.compare(MPI_COMM_WORLD), true); - EXPECT_EQ(copy.compare(MPI_COMM_WORLD), true); + EXPECT_EQ(copy == comm, true); } -TEST_F(Communicator, CommunicatorCanBeMoved) +TEST_F(Communicator, CommunicatorCanBeCopyAssigned) { - int size; - auto comm_world = gko::mpi::communicator::create_world(); - auto moved = std::move(comm_world); + gko::mpi::communicator copy = comm; - MPI_Comm_size(MPI_COMM_WORLD, &size); - EXPECT_EQ(comm_world, nullptr); - EXPECT_EQ(moved->compare(MPI_COMM_WORLD), true); - EXPECT_EQ(moved->size(), size); + EXPECT_EQ(copy == comm, true); } -TEST_F(Communicator, CommunicatorCanBeMoveConstructed) +TEST_F(Communicator, NonOwnedCommunicatorFailsToMove) { - int size; - auto comm_world = gko::mpi::communicator::create_world(); - auto moved = gko::mpi::communicator(std::move(*comm_world.get())); - - MPI_Comm_size(MPI_COMM_WORLD, &size); - EXPECT_EQ(comm_world->get(), MPI_COMM_NULL); - EXPECT_EQ(comm_world->size(), 0); - EXPECT_EQ(moved.compare(MPI_COMM_WORLD), true); - EXPECT_EQ(moved.size(), size); + ASSERT_THROW({ auto moved = std::move(comm); }, gko::NotSupported); } -TEST_F(Communicator, CommKnowsItsSize) +TEST_F(Communicator, NonOwnedCommunicatorFailsToMoveConstruct) { - int size; - MPI_Comm_size(MPI_COMM_WORLD, &size); - - EXPECT_EQ(comm.size(), size); + ASSERT_THROW({ auto moved = gko::mpi::communicator(std::move(comm)); }, + gko::NotSupported); } -TEST_F(Communicator, CommKnowsItsRank) -{ - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - EXPECT_EQ(comm.rank(), rank); -} - - -TEST_F(Communicator, CommKnowsItsLocalRank) +TEST_F(Communicator, CanSetCustomCommunicator) { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + auto world_rank = comm.rank(); + auto world_size = comm.size(); + auto color = world_rank / 4; - // Expect local rank to be same as rank when on one node - EXPECT_EQ(comm.local_rank(), rank); + auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); + for (auto i = 0; i < world_size; ++i) { + EXPECT_LT(row_comm.rank(), 4); + } } -TEST_F(Communicator, KnowsItsRanks) +TEST_F(Communicator, CanMoveAssignCustomCommunicator) { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + auto world_rank = comm.rank(); + auto world_size = comm.size(); + auto color = world_rank / 4; - EXPECT_EQ(comm.rank(), rank); + auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); + auto mv_row_comm = std::move(row_comm); + for (auto i = 0; i < world_size; ++i) { + EXPECT_LT(mv_row_comm.rank(), 4); + } } -TEST_F(Communicator, CanSetCustomCommunicator) +TEST_F(Communicator, CanMoveConstructCustomCommunicator) { auto world_rank = comm.rank(); auto world_size = comm.size(); auto color = world_rank / 4; auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); + gko::mpi::communicator mv_row_comm(std::move(row_comm)); for (auto i = 0; i < world_size; ++i) { - EXPECT_LT(row_comm.rank(), 4); + EXPECT_LT(mv_row_comm.rank(), 4); } } +TEST_F(Communicator, CanDuplicateCommunicator) +{ + auto comm2 = gko::mpi::communicator::duplicate(MPI_COMM_WORLD); + ASSERT_TRUE(comm2 == comm); +} + + } // namespace diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 13dedb6ec53..937a8bb4345 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #if GKO_HAVE_MPI @@ -215,126 +216,144 @@ class init_finalize { }; +namespace { + + +class comm_deleter { +public: + using pointer = MPI_Comm*; + void operator()(pointer comm) const + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(comm)); + } +}; + + +} // namespace + + /** * A communicator class that takes in the given communicator and duplicates it * for our purposes. As the class or object goes out of scope, the communicator * is freed. */ -class communicator : public EnableSharedCreateMethod { +class communicator { public: - communicator(const MPI_Comm& comm) + communicator(MPI_Comm comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(comm, &this->comm_)); + this->comm_ = + comm_manager(new MPI_Comm(comm), null_deleter{}); this->size_ = get_num_ranks(); this->rank_ = get_my_rank(); - this->local_rank_ = get_local_rank(); + this->node_local_rank_ = get_node_local_rank(); } communicator(const MPI_Comm& comm, int color, int key) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Comm_split(comm, color, key, &this->comm_)); + MPI_Comm comm_out; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm, color, key, &comm_out)); + this->comm_ = comm_manager(new MPI_Comm(comm_out), comm_deleter{}); this->size_ = get_num_ranks(); this->rank_ = get_my_rank(); - this->local_rank_ = get_local_rank(); - } - - communicator() - { - this->comm_ = MPI_COMM_NULL; - this->size_ = 0; - this->rank_ = -1; + this->node_local_rank_ = get_node_local_rank(); } communicator(communicator& other) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->local_rank_ = get_local_rank(); + MPI_Comm comm; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.get(), &comm)); + this->comm_ = comm_manager(new MPI_Comm(comm), comm_deleter{}); + this->size_ = other.size_; + this->rank_ = other.rank_; + this->node_local_rank_ = other.node_local_rank_; } communicator& operator=(const communicator& other) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->local_rank_ = get_local_rank(); + MPI_Comm comm; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.get(), &comm)); + this->comm_ = comm_manager(new MPI_Comm(comm), comm_deleter{}); + this->size_ = other.size_; + this->rank_ = other.rank_; + this->node_local_rank_ = other.node_local_rank_; return *this; } communicator(communicator&& other) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->local_rank_ = get_local_rank(); - other.comm_ = MPI_COMM_NULL; - other.size_ = 0; - other.rank_ = -1; + if (other.is_owning()) { + this->comm_ = std::move(other.comm_); + this->size_ = other.size_; + this->rank_ = other.rank_; + this->node_local_rank_ = other.node_local_rank_; + other.size_ = 0; + other.rank_ = -1; + } else { + // If we don't own the communicator, then we can't move from it. + GKO_NOT_SUPPORTED(other); + } } communicator& operator=(communicator&& other) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.comm_, &this->comm_)); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->local_rank_ = get_local_rank(); - other.size_ = 0; - other.rank_ = -1; + if (other.is_owning()) { + this->comm_ = std::move(other.comm_); + this->size_ = other.size_; + this->rank_ = other.rank_; + this->node_local_rank_ = other.node_local_rank_; + other.size_ = 0; + other.rank_ = -1; + } else { + // If we don't own the communicator, then we can't move from it. + GKO_NOT_SUPPORTED(other); + } return *this; } - static MPI_Comm get_comm_world() { return MPI_COMM_WORLD; } - - static std::shared_ptr create_world() + static communicator duplicate(const MPI_Comm& comm_in) { - return std::make_shared(get_comm_world()); + MPI_Comm comm; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(comm_in, &comm)); + communicator comm_out(comm); + return comm_out; } - MPI_Comm get() const { return comm_; } + const MPI_Comm& get() const { return *(this->comm_.get()); } int size() const { return size_; } int rank() const { return rank_; }; - int local_rank() const { return local_rank_; }; - - bool compare(const MPI_Comm& other) const - { - int flag; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(this->comm_, other, &flag)); - return flag; - } + int node_local_rank() const { return node_local_rank_; }; bool operator==(const communicator& rhs) { return compare(rhs.get()); } - ~communicator() + bool is_owning() { - if (this->comm_ && this->comm_ != MPI_COMM_NULL) { - MPI_Comm_free(&this->comm_); - } + return comm_.get_deleter().target_type() == typeid(comm_deleter); } private: - MPI_Comm comm_; + using comm_manager = + std::unique_ptr>; + comm_manager comm_; int size_{}; int rank_{}; - int local_rank_{}; + int node_local_rank_{}; int get_my_rank() { int my_rank = 0; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(comm_, &my_rank)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(get(), &my_rank)); return my_rank; } - int get_local_rank() + int get_node_local_rank() { MPI_Comm local_comm; int rank; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split_type( - comm_, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm)); + get(), MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm)); GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(local_comm, &rank)); MPI_Comm_free(&local_comm); return rank; @@ -343,9 +362,16 @@ class communicator : public EnableSharedCreateMethod { int get_num_ranks() { int size = 1; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(comm_, &size)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(get(), &size)); return size; } + + bool compare(const MPI_Comm& other) const + { + int flag; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(get(), other, &flag)); + return flag; + } }; @@ -363,9 +389,9 @@ inline double get_walltime() { return MPI_Wtime(); } * * @param comm the communicator */ -inline void synchronize(const std::shared_ptr& comm) +inline void synchronize(const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm->get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm.get())); } @@ -422,23 +448,20 @@ class window { window_ = std::exchange(other.window_, MPI_WIN_NULL); } - window(ValueType* base, unsigned int size, - std::shared_ptr comm, + window(ValueType* base, unsigned int size, const communicator& comm, const int disp_unit = sizeof(ValueType), MPI_Info input_info = MPI_INFO_NULL, win_type create_type = win_type::create) { if (create_type == win_type::create) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create(base, size, disp_unit, - input_info, comm->get(), - &this->window_)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create( + base, size, disp_unit, input_info, comm.get(), &this->window_)); } else if (create_type == win_type::dynamic_create) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create_dynamic( - input_info, comm->get(), &this->window_)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Win_create_dynamic(input_info, comm.get(), &this->window_)); } else if (create_type == win_type::allocate) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_allocate(size, disp_unit, - input_info, comm->get(), - base, &this->window_)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_allocate( + size, disp_unit, input_info, comm.get(), base, &this->window_)); } else { GKO_NOT_IMPLEMENTED; } @@ -525,11 +548,11 @@ class window { template inline void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Send( send_buffer, send_count, detail::mpi_type_impl::get_type(), - destination_rank, send_tag, comm->get())); + destination_rank, send_tag, comm.get())); } @@ -548,12 +571,12 @@ inline void send(const SendType* send_buffer, const int send_count, template inline MPI_Request i_send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, - std::shared_ptr comm) + const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend( send_buffer, send_count, detail::mpi_type_impl::get_type(), - destination_rank, send_tag, comm->get(), &req)); + destination_rank, send_tag, comm.get(), &req)); return req; } @@ -572,12 +595,12 @@ inline MPI_Request i_send(const SendType* send_buffer, const int send_count, template inline MPI_Status recv(RecvType* recv_buffer, const int recv_count, const int source_rank, const int recv_tag, - std::shared_ptr comm) + const communicator& comm) { MPI_Status status; GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv( recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - source_rank, recv_tag, comm->get(), &status)); + source_rank, recv_tag, comm.get(), &status)); return status; } @@ -597,12 +620,12 @@ inline MPI_Status recv(RecvType* recv_buffer, const int recv_count, template inline MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, const int source_rank, const int recv_tag, - std::shared_ptr comm) + const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv( recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - source_rank, recv_tag, comm->get(), &req)); + source_rank, recv_tag, comm.get(), &req)); return req; } @@ -713,11 +736,11 @@ inline MPI_Request r_get(GetType* origin_buffer, const int origin_count, */ template inline void broadcast(BroadcastType* buffer, int count, int root_rank, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast( buffer, count, detail::mpi_type_impl::get_type(), - root_rank, comm->get())); + root_rank, comm.get())); } @@ -733,12 +756,12 @@ inline void broadcast(BroadcastType* buffer, int count, int root_rank, template inline void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, MPI_Op operation, int root_rank, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS( MPI_Reduce(send_buffer, recv_buffer, count, detail::mpi_type_impl::get_type(), operation, - root_rank, comm->get())); + root_rank, comm.get())); } @@ -757,13 +780,13 @@ template inline MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, MPI_Op operation, int root_rank, - std::shared_ptr comm) + const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( MPI_Ireduce(send_buffer, recv_buffer, count, detail::mpi_type_impl::get_type(), operation, - root_rank, comm->get(), &req)); + root_rank, comm.get(), &req)); return req; } @@ -779,11 +802,11 @@ inline MPI_Request i_reduce(const ReduceType* send_buffer, */ template inline void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( detail::in_place(), recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, comm->get())); + detail::mpi_type_impl::get_type(), operation, comm.get())); } @@ -800,14 +823,13 @@ inline void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, */ template inline MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, - MPI_Op operation, - std::shared_ptr comm) + MPI_Op operation, const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( MPI_Iallreduce(detail::in_place(), recv_buffer, count, detail::mpi_type_impl::get_type(), operation, - comm->get(), &req)); + comm.get(), &req)); return req; } @@ -824,12 +846,11 @@ inline MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, */ template inline void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, - std::shared_ptr comm) + int count, MPI_Op operation, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, comm->get())); + detail::mpi_type_impl::get_type(), operation, comm.get())); } @@ -848,14 +869,13 @@ inline void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, template inline MPI_Request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, - MPI_Op operation, - std::shared_ptr comm) + MPI_Op operation, const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( MPI_Iallreduce(send_buffer, recv_buffer, count, detail::mpi_type_impl::get_type(), operation, - comm->get(), &req)); + comm.get(), &req)); return req; } @@ -873,12 +893,12 @@ inline MPI_Request i_all_reduce(const ReduceType* send_buffer, template inline void gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather( send_buffer, send_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - root_rank, comm->get())); + root_rank, comm.get())); } @@ -898,12 +918,12 @@ template inline void gather_v(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int* recv_counts, const int* displacements, int root_rank, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( send_buffer, send_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, displacements, - detail::mpi_type_impl::get_type(), root_rank, comm->get())); + detail::mpi_type_impl::get_type(), root_rank, comm.get())); } @@ -919,12 +939,12 @@ inline void gather_v(const SendType* send_buffer, const int send_count, template inline void all_gather(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( send_buffer, send_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - comm->get())); + comm.get())); } @@ -940,12 +960,12 @@ inline void all_gather(const SendType* send_buffer, const int send_count, template inline void scatter(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( send_buffer, send_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - root_rank, comm->get())); + root_rank, comm.get())); } @@ -963,12 +983,12 @@ template inline void scatter_v(const SendType* send_buffer, const int* send_counts, const int* displacements, RecvType* recv_buffer, const int recv_count, int root_rank, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( send_buffer, send_counts, displacements, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, - detail::mpi_type_impl::get_type(), root_rank, comm->get())); + detail::mpi_type_impl::get_type(), root_rank, comm.get())); } @@ -985,12 +1005,12 @@ inline void scatter_v(const SendType* send_buffer, const int* send_counts, */ template inline void all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( detail::in_place(), recv_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, - detail::mpi_type_impl::get_type(), comm->get())); + detail::mpi_type_impl::get_type(), comm.get())); } @@ -1009,13 +1029,13 @@ inline void all_to_all(RecvType* recv_buffer, const int recv_count, */ template inline MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) + const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( detail::in_place(), recv_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, - detail::mpi_type_impl::get_type(), comm->get(), &req)); + detail::mpi_type_impl::get_type(), comm.get(), &req)); return req; } @@ -1033,12 +1053,12 @@ inline MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, template inline void all_to_all(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int recv_count, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( send_buffer, send_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - comm->get())); + comm.get())); } @@ -1057,14 +1077,13 @@ inline void all_to_all(const SendType* send_buffer, const int send_count, template inline MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, - const int recv_count, - std::shared_ptr comm) + const int recv_count, const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( send_buffer, send_count, detail::mpi_type_impl::get_type(), recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - comm->get(), &req)); + comm.get(), &req)); return req; } @@ -1085,13 +1104,12 @@ template inline void all_to_all_v(const SendType* send_buffer, const int* send_counts, const int* send_offsets, RecvType* recv_buffer, const int* recv_counts, const int* recv_offsets, - std::shared_ptr comm) + const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( send_buffer, send_counts, send_offsets, detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, - recv_offsets, detail::mpi_type_impl::get_type(), - comm->get())); + recv_offsets, detail::mpi_type_impl::get_type(), comm.get())); } @@ -1115,13 +1133,13 @@ inline MPI_Request i_all_to_all_v(const SendType* send_buffer, const int* send_offsets, RecvType* recv_buffer, const int* recv_counts, const int* recv_offsets, - std::shared_ptr comm) + const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( send_buffer, send_counts, send_offsets, detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, - recv_offsets, detail::mpi_type_impl::get_type(), comm->get(), + recv_offsets, detail::mpi_type_impl::get_type(), comm.get(), &req)); return req; } @@ -1140,11 +1158,11 @@ inline MPI_Request i_all_to_all_v(const SendType* send_buffer, */ template inline void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - MPI_Op operation, std::shared_ptr comm) + MPI_Op operation, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan( send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, comm->get())); + detail::mpi_type_impl::get_type(), operation, comm.get())); } @@ -1162,14 +1180,13 @@ inline void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, */ template inline MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, - int count, MPI_Op operation, - std::shared_ptr comm) + int count, MPI_Op operation, const communicator& comm) { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS( MPI_Iscan(send_buffer, recv_buffer, count, detail::mpi_type_impl::get_type(), operation, - comm->get(), &req)); + comm.get(), &req)); return req; } From 7d11a47a0f8dcc77d2212c3b513ab499fd730b3d Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 19 Nov 2021 14:49:51 +0100 Subject: [PATCH 35/59] Fix MPI_Win, MPI_Put consistency issues --- core/test/mpi/base/bindings.cpp | 13 +++++++------ include/ginkgo/core/base/mpi.hpp | 4 +++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 3f0857748b8..5c38a18cc3a 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -147,16 +147,18 @@ TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) data = std::vector{0, 0, 0, 0}; } auto win = window(data.data(), 4 * sizeof(TypeParam), comm); - win.lock_all(); if (my_rank == 0) { + win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { gko::mpi::put(data.data(), 4, rank, 0, 4, win); - win.flush(rank); } + win.flush_local(0); + win.flush(rank); } + win.unlock_all(); } - win.unlock_all(); + win.fence(); auto ref = std::vector{1, 2, 3, 4}; ASSERT_EQ(data, ref); @@ -181,11 +183,12 @@ TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) if (rank != my_rank) { win.lock(rank, 0, window::lock_type::exclusive); gko::mpi::put(data.data(), 4, rank, 0, 4, win); - win.flush(rank); + win.flush(0); win.unlock(rank); } } } + win.fence(); auto ref = std::vector{1, 2, 3, 4}; ASSERT_EQ(data, ref); @@ -238,7 +241,6 @@ TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { gko::mpi::get(data.data(), 4, 0, 0, 4, win); - win.flush(0); } } win.unlock_all(); @@ -267,7 +269,6 @@ TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) if (rank != my_rank) { win.lock(0, 0, window::lock_type::exclusive); gko::mpi::get(data.data(), 4, 0, 0, 4, win); - win.flush(0); win.unlock(0); } } diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 937a8bb4345..de72094a4d4 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -417,7 +417,7 @@ inline MPI_Status wait(MPI_Request& req) */ inline std::vector wait_all(std::vector& req) { - std::vector status; + std::vector status(req.size()); GKO_ASSERT_NO_MPI_ERRORS( MPI_Waitall(req.size(), req.data(), status.data())); return status; @@ -524,6 +524,8 @@ class window { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(this->window_)); } + void sync() { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_sync(this->window_)); } + ~window() { if (this->window_ && this->window_ != MPI_WIN_NULL) { From df080b6317ab552bfcd2aa203d5bed2c83366e38 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sat, 20 Nov 2021 20:07:09 +0100 Subject: [PATCH 36/59] Fix GPU Aware functionality --- CMakeLists.txt | 25 ++---- cmake/autodetect_executors.cmake | 2 +- core/test/mpi/cuda-aware-mpi-test.cu | 82 ------------------- include/ginkgo/config.hpp.in | 6 ++ include/ginkgo/core/base/mpi.hpp | 13 +++ .../ginkgo/core/base/polymorphic_object.hpp | 20 ----- 6 files changed, 25 insertions(+), 123 deletions(-) delete mode 100644 core/test/mpi/cuda-aware-mpi-test.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fa791192c8..bae049c42ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON) option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON) option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP}) option(GINKGO_BUILD_MPI "Compile the MPI module" ${GINKGO_HAS_MPI}) +option(GINKGO_FORCE_GPU_AWARE_MPI "Force the GPU Aware functionality to be enabled" OFF) option(GINKGO_BUILD_DPCPP "Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP}) option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA}) @@ -191,29 +192,13 @@ else() endif() set(GINKGO_HAVE_MPI 0) -set(GINKGO_HAVE_CUDA_AWARE_MPI 0) +set(GINKGO_HAVE_GPU_AWARE_MPI 0) if(GINKGO_BUILD_MPI) find_package(MPI REQUIRED) set(GINKGO_HAVE_MPI 1) - if(GINKGO_BUILD_CUDA AND (${GINKGO_FORCE_CUDA_AWARE_MPI} MATCHES "") ) - enable_language(CUDA) - try_run(GKO_CUDA_AWARE_RUN_STATUS GKO_CUDA_AWARE_COMPILE_STATUS - "${CMAKE_BINARY_DIR}" SOURCES "${CMAKE_SOURCE_DIR}/mpi/test/cuda-aware-mpi-test.cu" - COMPILE_DEFINITIONS "" - LINK_LIBRARIES MPI::MPI_C - RUN_OUTPUT_VARIABLE GKO_CUDA_AWARE_RUN_OUT - COMPILE_OUTPUT_VARIABLE GKO_CUDA_AWARE_COMPILE_OUT) - if( ${GKO_CUDA_AWARE_RUN_STATUS} MATCHES "FAILED_TO_RUN" ) - message(STATUS "MPI does not support CUDA, disabling CUDA-Aware features, everything will be staged through the host.") - else() - message(STATUS "MPI supports CUDA, enabling CUDA-Aware features") - set(GINKGO_HAVE_CUDA_AWARE_MPI 1) - endif() - endif() - if( ${GINKGO_FORCE_CUDA_AWARE_MPI} MATCHES "YES" ) - set(GINKGO_HAVE_CUDA_AWARE_MPI 1) - elseif( ${GINKGO_FORCE_CUDA_AWARE_MPI} MATCHES "NO" ) - set(GINKGO_HAVE_CUDA_AWARE_MPI 0) + set(GINKGO_HAVE_GPU_AWARE_MPI 0) + if(GINKGO_FORCE_GPU_AWARE_MPI) + set(GINKGO_HAVE_GPU_AWARE_MPI 1) endif() endif() diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake index 431b53cbd36..f867609765b 100644 --- a/cmake/autodetect_executors.cmake +++ b/cmake/autodetect_executors.cmake @@ -20,7 +20,7 @@ endif() if(MPI_FOUND) if(NOT DEFINED GINKGO_BUILD_MPI) - message(STATUS "Enabling MPI executor") + message(STATUS "Enabling MPI support") endif() set(GINKGO_HAS_MPI ON) endif() diff --git a/core/test/mpi/cuda-aware-mpi-test.cu b/core/test/mpi/cuda-aware-mpi-test.cu deleted file mode 100644 index f19cbafc079..00000000000 --- a/core/test/mpi/cuda-aware-mpi-test.cu +++ /dev/null @@ -1,82 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include -#include -#include -#include - - -#include - - -int main(int argc, char* argv[]) -{ - int num_cuda_devices = 0; - cudaGetDeviceCount(&num_cuda_devices); - if (num_cuda_devices < 1) std::exit(-1); - MPI_Init(&argc, &argv); - int rank = 0; - int size = 0; - MPI_Comm_size(MPI_COMM_WORLD, &size); - assert(size > 1); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - cudaSetDevice(rank); - int* d_buf; - int* buf; - unsigned long len = 10; - buf = (int*)malloc(sizeof(int) * len); - for (int i = 0; i < len; ++i) { - buf[i] = (i + 1) * (rank + 1); - } - cudaMalloc(&d_buf, sizeof(int) * len); - cudaMemcpy(d_buf, buf, sizeof(int) * len, cudaMemcpyHostToDevice); - if (rank == 0) { - MPI_Send(d_buf, len, MPI_INT, 1, 12, MPI_COMM_WORLD); - } else { - MPI_Status status; - MPI_Recv(d_buf, len, MPI_INT, 0, 12, MPI_COMM_WORLD, &status); - for (int i = 0; i < len; ++i) { - bool flag = (buf[i] == (i + 1) * 2); - if (!flag) std::exit(-1); - } - cudaMemcpy(buf, d_buf, sizeof(int) * len, cudaMemcpyDeviceToHost); - for (int i = 0; i < len; ++i) { - bool flag = (buf[i] == (i + 1)); - if (!flag) std::exit(-1); - } - } - cudaFree(d_buf); - free(buf); - MPI_Finalize(); - return 0; -} diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 8436d8c3abb..061914f8f57 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -88,6 +88,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // clang-format on +/* Is GPU-aware MPI available ? */ +// clang-format off +#define GKO_HAVE_GPU_AWARE_MPI @GINKGO_HAVE_GPU_AWARE_MPI@ +// clang-format on + + /* Is HWLOC available ? */ // clang-format off #define GKO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@ diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index de72094a4d4..2ce24948d08 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -216,6 +216,19 @@ class init_finalize { }; +/** + * Returns if GPU aware functionality has been enabled + */ +static bool is_gpu_aware() +{ +#if GKO_HAVE_GPU_AWARE_MPI + return true; +#else + return false; +#endif +} + + namespace { diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp index 95b4ce5bcc9..71741479211 100644 --- a/include/ginkgo/core/base/polymorphic_object.hpp +++ b/include/ginkgo/core/base/polymorphic_object.hpp @@ -649,26 +649,6 @@ class EnableCreateMethod { }; -/** - * This mixin implements a static `create()` method on `ConcreteType` that - * dynamically allocates the memory, uses the passed-in arguments to construct - * the object, and returns an std::unique_ptr to such an object. - * - * @tparam ConcreteObject the concrete type for which `create()` is being - * implemented [CRTP parameter] - */ -template -class EnableSharedCreateMethod { -public: - template - static std::shared_ptr create(Args&&... args) - { - return std::shared_ptr( - new ConcreteType(std::forward(args)...)); - } -}; - - } // namespace gko From b1c264da86990b73923a7b8f916a99fb2d97b472 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sun, 21 Nov 2021 11:36:59 +0100 Subject: [PATCH 37/59] Update docs and fix types --- core/test/mpi/base/bindings.cpp | 16 +- include/ginkgo/core/base/mpi.hpp | 563 +++++++++++++++++++------------ 2 files changed, 360 insertions(+), 219 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 5c38a18cc3a..d95aa692295 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -128,6 +128,10 @@ TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) auto stat1 = gko::mpi::wait_all(req1); } else { auto stat2 = gko::mpi::wait(req2); + int count; + MPI_Get_count(&stat2, gko::mpi::type_impl::get_type(), + &count); + ASSERT_EQ(count, 4); auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; GKO_ASSERT_ARRAY_EQ(ref_array, recv_array); } @@ -146,7 +150,7 @@ TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) } else { data = std::vector{0, 0, 0, 0}; } - auto win = window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4, comm); if (my_rank == 0) { win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { @@ -177,7 +181,7 @@ TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) } else { data = std::vector{0, 0, 0, 0}; } - auto win = window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4, comm); if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { @@ -207,7 +211,7 @@ TYPED_TEST(MpiBindings, CanPutValuesWithFence) } else { data = std::vector{0, 0, 0, 0}; } - auto win = window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4, comm); win.fence(); if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { @@ -235,7 +239,7 @@ TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) } else { data = std::vector{0, 0, 0, 0}; } - auto win = window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4, comm); if (my_rank != 0) { win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { @@ -263,7 +267,7 @@ TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) } else { data = std::vector{0, 0, 0, 0}; } - auto win = window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4, comm); if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { @@ -291,7 +295,7 @@ TYPED_TEST(MpiBindings, CanGetValuesWithFence) } else { data = std::vector{0, 0, 0, 0}; } - auto win = window(data.data(), 4 * sizeof(TypeParam), comm); + auto win = window(data.data(), 4, comm); win.fence(); if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 2ce24948d08..1cbc7f94da1 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -61,109 +61,33 @@ namespace gko { namespace mpi { -/* - * This enum specifies the threading type to be used when creating an MPI - * environment. - */ -enum class thread_type { - serialized = MPI_THREAD_SERIALIZED, - funneled = MPI_THREAD_FUNNELED, - single = MPI_THREAD_SINGLE, - multiple = MPI_THREAD_MULTIPLE -}; - - -namespace detail { +#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \ + template <> \ + constexpr MPI_Datatype type_impl::get_type() \ + { \ + return mpi_type; \ + } template -struct mpi_type_impl { +struct type_impl { constexpr static MPI_Datatype get_type() { return MPI_DATATYPE_NULL; } }; -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_CHAR; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_UNSIGNED_CHAR; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_UNSIGNED; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_INT; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_UNSIGNED_SHORT; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_UNSIGNED_LONG; -} - +GKO_REGISTER_MPI_TYPE(char, MPI_CHAR); +GKO_REGISTER_MPI_TYPE(unsigned char, MPI_UNSIGNED_CHAR); +GKO_REGISTER_MPI_TYPE(unsigned, MPI_UNSIGNED); +GKO_REGISTER_MPI_TYPE(int, MPI_INT); +GKO_REGISTER_MPI_TYPE(unsigned short, MPI_UNSIGNED_SHORT); +GKO_REGISTER_MPI_TYPE(unsigned long, MPI_UNSIGNED_LONG); +GKO_REGISTER_MPI_TYPE(long, MPI_LONG); +GKO_REGISTER_MPI_TYPE(float, MPI_FLOAT); +GKO_REGISTER_MPI_TYPE(double, MPI_DOUBLE); +GKO_REGISTER_MPI_TYPE(long double, MPI_LONG_DOUBLE); +GKO_REGISTER_MPI_TYPE(std::complex, MPI_C_COMPLEX); +GKO_REGISTER_MPI_TYPE(std::complex, MPI_C_DOUBLE_COMPLEX); -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_LONG; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_FLOAT; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_DOUBLE; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl::get_type() -{ - return MPI_LONG_DOUBLE; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl>::get_type() -{ - return MPI_C_COMPLEX; -} - - -template <> -constexpr MPI_Datatype mpi_type_impl>::get_type() -{ - return MPI_C_DOUBLE_COMPLEX; -} template inline const T* in_place() @@ -172,14 +96,28 @@ inline const T* in_place() } -} // namespace detail +/** + * This enum specifies the threading type to be used when creating an MPI + * environment. + */ +enum class thread_type { + serialized = MPI_THREAD_SERIALIZED, + funneled = MPI_THREAD_FUNNELED, + single = MPI_THREAD_SINGLE, + multiple = MPI_THREAD_MULTIPLE +}; + -/* - * Class that sets up and finalizes the MPI exactly once per program execution. - * using the singleton pattern. This must be called before any of the MPI - * functions. +/** + * Class that sets up and finalizes the MPI environment. This class is a simple + * RAII wrapper to MPI_Init and MPI_Finalize. + * + * MPI_Init must have been called before calling any MPI functions. + * + * @note If MPI_Init has already been called, then this class should not be + * used. */ -class init_finalize { +class environment { public: static bool is_finalized() { @@ -195,8 +133,23 @@ class init_finalize { return flag; } - init_finalize(int& argc, char**& argv, - const thread_type thread_t = thread_type::serialized) + /** + * Return the provided thread support. + * + * @return the provided thread support + */ + int get_provided_thread_support() { return provided_thread_support_; } + + /** + * Call MPI_Init_thread and initialize the MPI environment + * + * @param argc the number of arguments to the main function. + * @param argv the arguments provided to the main function. + * @param thread_t the type of threading for initialization. See + * @thread_type + */ + environment(int& argc, char**& argv, + const thread_type thread_t = thread_type::serialized) { this->required_thread_support_ = static_cast(thread_t); GKO_ASSERT_NO_MPI_ERRORS( @@ -204,11 +157,12 @@ class init_finalize { &(this->provided_thread_support_))); } - init_finalize() = delete; + /** + * Call MPI_Finalize at the end of the scope of this class. + */ + ~environment() { MPI_Finalize(); } - ~init_finalize() { MPI_Finalize(); } - - int get_provided_thread_support() { return provided_thread_support_; } + environment() = delete; private: int required_thread_support_; @@ -232,6 +186,9 @@ static bool is_gpu_aware() namespace { +/** + * A deleter class that calls MPI_Comm_free on the owning MPI_Comm object + */ class comm_deleter { public: using pointer = MPI_Comm*; @@ -252,7 +209,15 @@ class comm_deleter { */ class communicator { public: - communicator(MPI_Comm comm) + /** + * Non-owning constructor for an existing communicator of type MPI_Comm. The + * MPI_Comm object will not be deleted after the communicator object has + * been freed and an explicit MPI_Comm_free needs to be called on the + * original MPI_Comm_free object. + * + * @param comm The input MPI_Comm object. + */ + communicator(const MPI_Comm& comm) { this->comm_ = comm_manager(new MPI_Comm(comm), null_deleter{}); @@ -261,6 +226,14 @@ class communicator { this->node_local_rank_ = get_node_local_rank(); } + /** + * Create a communicator object from an existing MPI_Comm object using color + * and key. + * + * @param comm The input MPI_Comm object. + * @param color The color to split the original comm object + * @param key The key to split the comm object + */ communicator(const MPI_Comm& comm, int color, int key) { MPI_Comm comm_out; @@ -271,6 +244,30 @@ class communicator { this->node_local_rank_ = get_node_local_rank(); } + /** + * Create a communicator object from an existing MPI_Comm object using color + * and key. + * + * @param comm The input communicator object. + * @param color The color to split the original comm object + * @param key The key to split the comm object + */ + communicator(const communicator& comm, int color, int key) + { + MPI_Comm comm_out; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Comm_split(comm.get(), color, key, &comm_out)); + this->comm_ = comm_manager(new MPI_Comm(comm_out), comm_deleter{}); + this->size_ = get_num_ranks(); + this->rank_ = get_my_rank(); + this->node_local_rank_ = get_node_local_rank(); + } + + /** + * Copy constructor. The underlying MPI_Comm object will be duplicated. + * + * @param other the object to be copied + */ communicator(communicator& other) { MPI_Comm comm; @@ -281,6 +278,12 @@ class communicator { this->node_local_rank_ = other.node_local_rank_; } + /** + * Copy assignment operator. The underlying MPI_Comm object will be + * duplicated. + * + * @param other the object to be copied + */ communicator& operator=(const communicator& other) { MPI_Comm comm; @@ -292,6 +295,12 @@ class communicator { return *this; } + /** + * Move constructor. If we own the underlying communicator, then we move the + * object over. If we don't, then we throw. + * + * @param other the object to be moved + */ communicator(communicator&& other) { if (other.is_owning()) { @@ -307,6 +316,12 @@ class communicator { } } + /** + * Move assignment operator. If we own the underlying communicator, then we + * move the object over. If we don't, then we throw. + * + * @param other the object to be moved + */ communicator& operator=(communicator&& other) { if (other.is_owning()) { @@ -323,6 +338,12 @@ class communicator { return *this; } + /** + * Duplicate and create an owning communicator from an input MPI_Comm + * object. + * + * @param comm_in the input MPI_Comm object to be duplicated + */ static communicator duplicate(const MPI_Comm& comm_in) { MPI_Comm comm; @@ -331,16 +352,46 @@ class communicator { return comm_out; } + /** + * Return the underlying MPI_Comm object. + * + * @return the MPI_Comm object + */ const MPI_Comm& get() const { return *(this->comm_.get()); } + /** + * Return the size of the communicator (number of ranks). + * + * @return the size + */ int size() const { return size_; } + /** + * Return the rank of the calling process in the communicator. + * + * @return the rank + */ int rank() const { return rank_; }; + /** + * Return the node local rank of the calling process in the communicator. + * + * @return the node local rank + */ int node_local_rank() const { return node_local_rank_; }; + /** + * Compare two communicator objects. + * + * @return if the two comm objects are equal + */ bool operator==(const communicator& rhs) { return compare(rhs.get()); } + /** + * Check if the underlying comm object is owned + * + * @return if the underlying comm object is owned + */ bool is_owning() { return comm_.get_deleter().target_type() == typeid(comm_deleter); @@ -398,7 +449,7 @@ inline double get_walltime() { return MPI_Wtime(); } /** * This function is used to synchronize between the ranks of a given - * communicator. + * communicator. Calls MPI_Barrier * * @param comm the communicator */ @@ -423,9 +474,9 @@ inline MPI_Status wait(MPI_Request& req) /** - * Allows a rank to wait on a particular request handle. + * Allows a rank to wait on multiple request handles. * - * @param req The request to wait on. + * @param req The request handles to wait on. * @param status The status variable that can be queried. */ inline std::vector wait_all(std::vector& req) @@ -448,31 +499,69 @@ inline std::vector wait_all(std::vector& req) template class window { public: - enum class win_type { allocate = 1, create = 2, dynamic_create = 3 }; + /** + * The create type for the window object. + */ + enum class create_type { allocate = 1, create = 2, dynamic_create = 3 }; + + /** + * The lock type for passive target synchronization of the windows. + */ enum class lock_type { shared = 1, exclusive = 2 }; + /** + * The default constructor. It creates a null window of MPI_WIN_NULL type. + */ window() : window_(MPI_WIN_NULL) {} + window(const window& other) = delete; + window& operator=(const window& other) = delete; + + /** + * The move constructor. Move the other object and replace it with + * MPI_WIN_NULL + * + * @param other the window object to be moved. + */ window(window&& other) : window_{std::exchange(other.window_, MPI_WIN_NULL)} {} + + /** + * The move assignment operator. Move the other object and replace it with + * MPI_WIN_NULL + * + * @param other the window object to be moved. + */ window& operator=(window&& other) { window_ = std::exchange(other.window_, MPI_WIN_NULL); } - window(ValueType* base, unsigned int size, const communicator& comm, + /** + * Create a window object with a given data pointer and type. A collective + * operation. + * + * @param base the base pointer for the window object. + * @param num_elems the num_elems of type ValueType the window points to. + * @param comm the communicator whose ranks will have windows created. + * @param disp_unit the displacement from base for the window object. + * @param input_info the MPI_Info object used to set certain properties. + * @param c_type the type of creation method to use to create the window. + */ + window(ValueType* base, int num_elems, const communicator& comm, const int disp_unit = sizeof(ValueType), MPI_Info input_info = MPI_INFO_NULL, - win_type create_type = win_type::create) + create_type c_type = create_type::create) { - if (create_type == win_type::create) { + unsigned size = num_elems * sizeof(ValueType); + if (c_type == create_type::create) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create( base, size, disp_unit, input_info, comm.get(), &this->window_)); - } else if (create_type == win_type::dynamic_create) { + } else if (c_type == create_type::dynamic_create) { GKO_ASSERT_NO_MPI_ERRORS( MPI_Win_create_dynamic(input_info, comm.get(), &this->window_)); - } else if (create_type == win_type::allocate) { + } else if (c_type == create_type::allocate) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_allocate( size, disp_unit, input_info, comm.get(), base, &this->window_)); } else { @@ -480,8 +569,19 @@ class window { } } + /** + * Get the underlying window object of MPI_Win type. + * + * @return the underlying window object. + */ MPI_Win get() { return this->window_; } + /** + * The active target synchronization using MPI_Win_fence for the window + * object. This is called on all associated ranks. + * + * @param assert the optimization level. 0 is always valid. + */ void fence(int assert = 0) { if (&this->window_) { @@ -489,6 +589,13 @@ class window { } } + /** + * Create an epoch using MPI_Win_lock for the window + * object. + * + * @param rank the target rank. + * @param assert the optimization level. 0 is always valid. + */ void lock(int rank, int assert = 0, lock_type lock_t = lock_type::shared) { if (lock_t == lock_type::shared) { @@ -502,43 +609,85 @@ class window { } } + /** + * Close the epoch using MPI_Win_unlock for the window + * object. + * + * @param rank the target rank. + */ void unlock(int rank) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock(rank, this->window_)); } + /** + * Create the epoch on all ranks using MPI_Win_lock_all for the window + * object. + * + * @param assert the optimization level. 0 is always valid. + */ void lock_all(int assert = 0) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock_all(assert, this->window_)); } + /** + * Close the epoch on all ranks using MPI_Win_unlock_all for the window + * object. + */ void unlock_all() { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock_all(this->window_)); } + /** + * Flush the existing RDMA operations on the target rank for the calling + * process for the window object. + * + * @param rank the target rank. + */ void flush(int rank) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush(rank, this->window_)); } + /** + * Flush the existing RDMA operations on the calling rank from the target + * rank for the window object. + * + * @param rank the target rank. + */ void flush_local(int rank) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local(rank, this->window_)); } + /** + * Flush all the existing RDMA operations for the calling + * process for the window object. + */ void flush_all() { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_all(this->window_)); } + /** + * Flush all the local existing RDMA operations on the calling rank for the + * window object. + */ void flush_all_local() { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(this->window_)); } + /** + * Synchronize the public and private buffers for the window object + */ void sync() { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_sync(this->window_)); } + /** + * The deleter which calls MPI_Win_free when the window leaves its scope. + */ ~window() { if (this->window_ && this->window_ != MPI_WIN_NULL) { @@ -565,9 +714,9 @@ inline void send(const SendType* send_buffer, const int send_count, const int destination_rank, const int send_tag, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Send( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - destination_rank, send_tag, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, + type_impl::get_type(), + destination_rank, send_tag, comm.get())); } @@ -589,9 +738,9 @@ inline MPI_Request i_send(const SendType* send_buffer, const int send_count, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Isend( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - destination_rank, send_tag, comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Isend(send_buffer, send_count, type_impl::get_type(), + destination_rank, send_tag, comm.get(), &req)); return req; } @@ -613,9 +762,9 @@ inline MPI_Status recv(RecvType* recv_buffer, const int recv_count, const communicator& comm) { MPI_Status status; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Recv( - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - source_rank, recv_tag, comm.get(), &status)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Recv(recv_buffer, recv_count, type_impl::get_type(), + source_rank, recv_tag, comm.get(), &status)); return status; } @@ -638,9 +787,9 @@ inline MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv( - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - source_rank, recv_tag, comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Irecv(recv_buffer, recv_count, type_impl::get_type(), + source_rank, recv_tag, comm.get(), &req)); return req; } @@ -660,10 +809,10 @@ inline void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Put( - origin_buffer, origin_count, detail::mpi_type_impl::get_type(), - target_rank, target_disp, target_count, - detail::mpi_type_impl::get_type(), window.get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Put(origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), window.get())); } @@ -685,10 +834,10 @@ inline MPI_Request r_put(const PutType* origin_buffer, const int origin_count, const int target_count, window& window) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput( - origin_buffer, origin_count, detail::mpi_type_impl::get_type(), - target_rank, target_disp, target_count, - detail::mpi_type_impl::get_type(), window.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Rput(origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), window.get(), &req)); return req; } @@ -708,10 +857,10 @@ inline void get(GetType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, const int target_count, window& window) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Get( - origin_buffer, origin_count, detail::mpi_type_impl::get_type(), - target_rank, target_disp, target_count, - detail::mpi_type_impl::get_type(), window.get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Get(origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), window.get())); } @@ -733,10 +882,10 @@ inline MPI_Request r_get(GetType* origin_buffer, const int origin_count, const int target_count, window& window) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( - origin_buffer, origin_count, detail::mpi_type_impl::get_type(), - target_rank, target_disp, target_count, - detail::mpi_type_impl::get_type(), window, &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Rget(origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), window, &req)); return req; } @@ -753,9 +902,9 @@ template inline void broadcast(BroadcastType* buffer, int count, int root_rank, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast( - buffer, count, detail::mpi_type_impl::get_type(), - root_rank, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count, + type_impl::get_type(), + root_rank, comm.get())); } @@ -773,10 +922,9 @@ inline void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, MPI_Op operation, int root_rank, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Reduce(send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, - root_rank, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, root_rank, comm.get())); } @@ -798,10 +946,9 @@ inline MPI_Request i_reduce(const ReduceType* send_buffer, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Ireduce(send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, - root_rank, comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce( + send_buffer, recv_buffer, count, type_impl::get_type(), + operation, root_rank, comm.get(), &req)); return req; } @@ -820,8 +967,8 @@ inline void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( - detail::in_place(), recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, comm.get())); + in_place(), recv_buffer, count, + type_impl::get_type(), operation, comm.get())); } @@ -841,10 +988,9 @@ inline MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Iallreduce(detail::in_place(), recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, - comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( + in_place(), recv_buffer, count, + type_impl::get_type(), operation, comm.get(), &req)); return req; } @@ -863,9 +1009,9 @@ template inline void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, int count, MPI_Op operation, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( - send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, comm.get())); } @@ -887,10 +1033,9 @@ inline MPI_Request i_all_reduce(const ReduceType* send_buffer, MPI_Op operation, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Iallreduce(send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, - comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, comm.get(), &req)); return req; } @@ -911,9 +1056,8 @@ inline void gather(const SendType* send_buffer, const int send_count, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - root_rank, comm.get())); + send_buffer, send_count, type_impl::get_type(), recv_buffer, + recv_count, type_impl::get_type(), root_rank, comm.get())); } @@ -935,10 +1079,10 @@ inline void gather_v(const SendType* send_buffer, const int send_count, const int* displacements, int root_rank, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - recv_buffer, recv_counts, displacements, - detail::mpi_type_impl::get_type(), root_rank, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Gatherv(send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_counts, displacements, + type_impl::get_type(), root_rank, comm.get())); } @@ -957,9 +1101,8 @@ inline void all_gather(const SendType* send_buffer, const int send_count, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - comm.get())); + send_buffer, send_count, type_impl::get_type(), recv_buffer, + recv_count, type_impl::get_type(), comm.get())); } @@ -978,9 +1121,8 @@ inline void scatter(const SendType* send_buffer, const int send_count, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - root_rank, comm.get())); + send_buffer, send_count, type_impl::get_type(), recv_buffer, + recv_count, type_impl::get_type(), root_rank, comm.get())); } @@ -1000,10 +1142,10 @@ inline void scatter_v(const SendType* send_buffer, const int* send_counts, const int recv_count, int root_rank, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( - send_buffer, send_counts, displacements, - detail::mpi_type_impl::get_type(), recv_buffer, recv_count, - detail::mpi_type_impl::get_type(), root_rank, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Scatterv(send_buffer, send_counts, displacements, + type_impl::get_type(), recv_buffer, recv_count, + type_impl::get_type(), root_rank, comm.get())); } @@ -1023,9 +1165,8 @@ inline void all_to_all(RecvType* recv_buffer, const int recv_count, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( - detail::in_place(), recv_count, - detail::mpi_type_impl::get_type(), recv_buffer, recv_count, - detail::mpi_type_impl::get_type(), comm.get())); + in_place(), recv_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), comm.get())); } @@ -1047,10 +1188,10 @@ inline MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( - detail::in_place(), recv_count, - detail::mpi_type_impl::get_type(), recv_buffer, recv_count, - detail::mpi_type_impl::get_type(), comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Ialltoall(in_place(), recv_count, + type_impl::get_type(), recv_buffer, recv_count, + type_impl::get_type(), comm.get(), &req)); return req; } @@ -1071,9 +1212,8 @@ inline void all_to_all(const SendType* send_buffer, const int send_count, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - comm.get())); + send_buffer, send_count, type_impl::get_type(), recv_buffer, + recv_count, type_impl::get_type(), comm.get())); } @@ -1096,9 +1236,8 @@ inline MPI_Request i_all_to_all(const SendType* send_buffer, { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( - send_buffer, send_count, detail::mpi_type_impl::get_type(), - recv_buffer, recv_count, detail::mpi_type_impl::get_type(), - comm.get(), &req)); + send_buffer, send_count, type_impl::get_type(), recv_buffer, + recv_count, type_impl::get_type(), comm.get(), &req)); return req; } @@ -1122,9 +1261,9 @@ inline void all_to_all_v(const SendType* send_buffer, const int* send_counts, const communicator& comm) { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( - send_buffer, send_counts, send_offsets, - detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, - recv_offsets, detail::mpi_type_impl::get_type(), comm.get())); + send_buffer, send_counts, send_offsets, type_impl::get_type(), + recv_buffer, recv_counts, recv_offsets, type_impl::get_type(), + comm.get())); } @@ -1152,10 +1291,9 @@ inline MPI_Request i_all_to_all_v(const SendType* send_buffer, { MPI_Request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( - send_buffer, send_counts, send_offsets, - detail::mpi_type_impl::get_type(), recv_buffer, recv_counts, - recv_offsets, detail::mpi_type_impl::get_type(), comm.get(), - &req)); + send_buffer, send_counts, send_offsets, type_impl::get_type(), + recv_buffer, recv_counts, recv_offsets, type_impl::get_type(), + comm.get(), &req)); return req; } @@ -1175,9 +1313,9 @@ template inline void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, MPI_Op operation, const communicator& comm) { - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan( - send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, comm.get())); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, comm.get())); } @@ -1198,10 +1336,9 @@ inline MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, MPI_Op operation, const communicator& comm) { MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Iscan(send_buffer, recv_buffer, count, - detail::mpi_type_impl::get_type(), operation, - comm.get(), &req)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, comm.get(), &req)); return req; } From ced2a6121c8c53fc2f195bec92b214888be79746 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 22 Nov 2021 10:42:09 +0100 Subject: [PATCH 38/59] Make sync a comm member function --- core/test/mpi/base/communicator.cpp | 8 ++++++-- include/ginkgo/core/base/mpi.hpp | 18 ++++++------------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index 399f97db741..31844e85e1a 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -88,14 +88,18 @@ TEST_F(Communicator, CommKnowsItsLocalRank) TEST_F(Communicator, CommunicatorCanBeCopyConstructed) { - int rank = 5; - MPI_Comm_rank(comm.get(), &rank); gko::mpi::communicator copy(comm); EXPECT_EQ(copy == comm, true); } +TEST_F(Communicator, CommunicatorCanBeSynchronized) +{ + ASSERT_NO_THROW(comm.synchronize()); +} + + TEST_F(Communicator, CommunicatorCanBeCopyAssigned) { gko::mpi::communicator copy = comm; diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 1cbc7f94da1..43b2301abbc 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -397,6 +397,12 @@ class communicator { return comm_.get_deleter().target_type() == typeid(comm_deleter); } + /** + * This function is used to synchronize the ranks in the communicator. + * Calls MPI_Barrier + */ + void synchronize() const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(get())); } + private: using comm_manager = std::unique_ptr>; @@ -447,18 +453,6 @@ class communicator { inline double get_walltime() { return MPI_Wtime(); } -/** - * This function is used to synchronize between the ranks of a given - * communicator. Calls MPI_Barrier - * - * @param comm the communicator - */ -inline void synchronize(const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(comm.get())); -} - - /** * Allows a rank to wait on a particular request handle. * From 4ecb5a105282570fd90b1530f929592d89912d45 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 25 Nov 2021 11:42:35 +0100 Subject: [PATCH 39/59] Review update. Co-authored-by: Aditya Kashi Co-authored-by: Tobias Ribizel --- core/test/mpi/base/communicator.cpp | 3 ++- include/ginkgo/core/base/mpi.hpp | 31 ++++++++++++++++------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index 31844e85e1a..0631c95258e 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -141,7 +141,8 @@ TEST_F(Communicator, CanMoveAssignCustomCommunicator) auto color = world_rank / 4; auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); - auto mv_row_comm = std::move(row_comm); + gko::mpi::communicator mv_row_comm(MPI_COMM_WORLD); + mv_row_comm = std::move(row_comm); for (auto i = 0; i < world_size; ++i) { EXPECT_LT(mv_row_comm.rank(), 4); } diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 43b2301abbc..d205af9ba8c 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -34,12 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_PUBLIC_CORE_BASE_MPI_HPP_ -#include #include -#include #include #include -#include #include @@ -61,18 +58,14 @@ namespace gko { namespace mpi { -#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \ - template <> \ - constexpr MPI_Datatype type_impl::get_type() \ - { \ - return mpi_type; \ +#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \ + template <> \ + struct type_impl { \ + static MPI_Datatype get_type() { return mpi_type; } \ } - template -struct type_impl { - constexpr static MPI_Datatype get_type() { return MPI_DATATYPE_NULL; } -}; +struct type_impl {}; GKO_REGISTER_MPI_TYPE(char, MPI_CHAR); @@ -162,7 +155,10 @@ class environment { */ ~environment() { MPI_Finalize(); } - environment() = delete; + environment(const environment&) = delete; + environment(environment&&) = delete; + environment& operator=(const environment&) = delete; + environment& operator=(environment&&) = delete; private: int required_thread_support_; @@ -195,6 +191,7 @@ class comm_deleter { void operator()(pointer comm) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(comm)); + delete comm; } }; @@ -268,7 +265,7 @@ class communicator { * * @param other the object to be copied */ - communicator(communicator& other) + communicator(const communicator& other) { MPI_Comm comm; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.get(), &comm)); @@ -286,6 +283,9 @@ class communicator { */ communicator& operator=(const communicator& other) { + if (&other == this) { + return *this; + } MPI_Comm comm; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.get(), &comm)); this->comm_ = comm_manager(new MPI_Comm(comm), comm_deleter{}); @@ -324,6 +324,9 @@ class communicator { */ communicator& operator=(communicator&& other) { + if (&other == this) { + return *this; + } if (other.is_owning()) { this->comm_ = std::move(other.comm_); this->size_ = other.size_; From c2d96798782740153a7e66bda363acbed20653af Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 25 Nov 2021 17:54:14 +0100 Subject: [PATCH 40/59] Store MPI_Comm in a shared_ptr --- core/test/mpi/base/communicator.cpp | 59 ++++---------- include/ginkgo/core/base/mpi.hpp | 119 ++++------------------------ 2 files changed, 29 insertions(+), 149 deletions(-) diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index 0631c95258e..19db40b9773 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -90,13 +90,7 @@ TEST_F(Communicator, CommunicatorCanBeCopyConstructed) { gko::mpi::communicator copy(comm); - EXPECT_EQ(copy == comm, true); -} - - -TEST_F(Communicator, CommunicatorCanBeSynchronized) -{ - ASSERT_NO_THROW(comm.synchronize()); + EXPECT_TRUE(copy == comm); } @@ -104,70 +98,45 @@ TEST_F(Communicator, CommunicatorCanBeCopyAssigned) { gko::mpi::communicator copy = comm; - EXPECT_EQ(copy == comm, true); + EXPECT_TRUE(copy == comm); } -TEST_F(Communicator, NonOwnedCommunicatorFailsToMove) +TEST_F(Communicator, CommunicatorCanBeMoveConstructed) { - ASSERT_THROW({ auto moved = std::move(comm); }, gko::NotSupported); -} - + gko::mpi::communicator comm2(MPI_COMM_WORLD); + gko::mpi::communicator copy(std::move(comm2)); -TEST_F(Communicator, NonOwnedCommunicatorFailsToMoveConstruct) -{ - ASSERT_THROW({ auto moved = gko::mpi::communicator(std::move(comm)); }, - gko::NotSupported); + EXPECT_TRUE(copy == comm); } -TEST_F(Communicator, CanSetCustomCommunicator) +TEST_F(Communicator, CommunicatorCanBeMoveAssigned) { - auto world_rank = comm.rank(); - auto world_size = comm.size(); - auto color = world_rank / 4; + gko::mpi::communicator comm2(MPI_COMM_WORLD); + gko::mpi::communicator copy = std::move(comm2); - auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); - for (auto i = 0; i < world_size; ++i) { - EXPECT_LT(row_comm.rank(), 4); - } + EXPECT_TRUE(copy == comm); } -TEST_F(Communicator, CanMoveAssignCustomCommunicator) +TEST_F(Communicator, CommunicatorCanBeSynchronized) { - auto world_rank = comm.rank(); - auto world_size = comm.size(); - auto color = world_rank / 4; - - auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); - gko::mpi::communicator mv_row_comm(MPI_COMM_WORLD); - mv_row_comm = std::move(row_comm); - for (auto i = 0; i < world_size; ++i) { - EXPECT_LT(mv_row_comm.rank(), 4); - } + ASSERT_NO_THROW(comm.synchronize()); } -TEST_F(Communicator, CanMoveConstructCustomCommunicator) +TEST_F(Communicator, CanSetCustomCommunicator) { auto world_rank = comm.rank(); auto world_size = comm.size(); auto color = world_rank / 4; auto row_comm = gko::mpi::communicator(comm.get(), color, world_rank); - gko::mpi::communicator mv_row_comm(std::move(row_comm)); for (auto i = 0; i < world_size; ++i) { - EXPECT_LT(mv_row_comm.rank(), 4); + EXPECT_LT(row_comm.rank(), 4); } } -TEST_F(Communicator, CanDuplicateCommunicator) -{ - auto comm2 = gko::mpi::communicator::duplicate(MPI_COMM_WORLD); - ASSERT_TRUE(comm2 == comm); -} - - } // namespace diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index d205af9ba8c..7c02ae36a71 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -183,13 +183,15 @@ namespace { /** - * A deleter class that calls MPI_Comm_free on the owning MPI_Comm object + * A deleter class that calls MPI_Comm_free on the owning MPI_Comm object and + * deletes the underlying comm ptr */ class comm_deleter { public: using pointer = MPI_Comm*; void operator()(pointer comm) const { + GKO_ASSERT(*comm != MPI_COMM_NULL); GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(comm)); delete comm; } @@ -210,14 +212,13 @@ class communicator { * Non-owning constructor for an existing communicator of type MPI_Comm. The * MPI_Comm object will not be deleted after the communicator object has * been freed and an explicit MPI_Comm_free needs to be called on the - * original MPI_Comm_free object. + * original MPI_Comm object. * * @param comm The input MPI_Comm object. */ communicator(const MPI_Comm& comm) { - this->comm_ = - comm_manager(new MPI_Comm(comm), null_deleter{}); + this->comm_.reset(new MPI_Comm(comm)); this->size_ = get_num_ranks(); this->rank_ = get_my_rank(); this->node_local_rank_ = get_node_local_rank(); @@ -235,7 +236,7 @@ class communicator { { MPI_Comm comm_out; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm, color, key, &comm_out)); - this->comm_ = comm_manager(new MPI_Comm(comm_out), comm_deleter{}); + this->comm_.reset(new MPI_Comm(comm_out), comm_deleter{}); this->size_ = get_num_ranks(); this->rank_ = get_my_rank(); this->node_local_rank_ = get_node_local_rank(); @@ -254,107 +255,29 @@ class communicator { MPI_Comm comm_out; GKO_ASSERT_NO_MPI_ERRORS( MPI_Comm_split(comm.get(), color, key, &comm_out)); - this->comm_ = comm_manager(new MPI_Comm(comm_out), comm_deleter{}); + this->comm_.reset(new MPI_Comm(comm_out), comm_deleter{}); this->size_ = get_num_ranks(); this->rank_ = get_my_rank(); this->node_local_rank_ = get_node_local_rank(); } - /** - * Copy constructor. The underlying MPI_Comm object will be duplicated. - * - * @param other the object to be copied - */ - communicator(const communicator& other) - { - MPI_Comm comm; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.get(), &comm)); - this->comm_ = comm_manager(new MPI_Comm(comm), comm_deleter{}); - this->size_ = other.size_; - this->rank_ = other.rank_; - this->node_local_rank_ = other.node_local_rank_; - } + communicator(const communicator& other) = default; - /** - * Copy assignment operator. The underlying MPI_Comm object will be - * duplicated. - * - * @param other the object to be copied - */ - communicator& operator=(const communicator& other) - { - if (&other == this) { - return *this; - } - MPI_Comm comm; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(other.get(), &comm)); - this->comm_ = comm_manager(new MPI_Comm(comm), comm_deleter{}); - this->size_ = other.size_; - this->rank_ = other.rank_; - this->node_local_rank_ = other.node_local_rank_; - return *this; - } + communicator& operator=(const communicator& other) = default; - /** - * Move constructor. If we own the underlying communicator, then we move the - * object over. If we don't, then we throw. - * - * @param other the object to be moved - */ communicator(communicator&& other) { - if (other.is_owning()) { - this->comm_ = std::move(other.comm_); - this->size_ = other.size_; - this->rank_ = other.rank_; - this->node_local_rank_ = other.node_local_rank_; - other.size_ = 0; - other.rank_ = -1; - } else { - // If we don't own the communicator, then we can't move from it. - GKO_NOT_SUPPORTED(other); - } + this->comm_ = std::move(other.comm_); + other.comm_.reset(new MPI_Comm(MPI_COMM_NULL)); } - /** - * Move assignment operator. If we own the underlying communicator, then we - * move the object over. If we don't, then we throw. - * - * @param other the object to be moved - */ communicator& operator=(communicator&& other) { - if (&other == this) { - return *this; - } - if (other.is_owning()) { - this->comm_ = std::move(other.comm_); - this->size_ = other.size_; - this->rank_ = other.rank_; - this->node_local_rank_ = other.node_local_rank_; - other.size_ = 0; - other.rank_ = -1; - } else { - // If we don't own the communicator, then we can't move from it. - GKO_NOT_SUPPORTED(other); - } + this->comm_ = std::move(other.comm_); + other.comm_.reset(new MPI_Comm(MPI_COMM_NULL)); return *this; } - /** - * Duplicate and create an owning communicator from an input MPI_Comm - * object. - * - * @param comm_in the input MPI_Comm object to be duplicated - */ - static communicator duplicate(const MPI_Comm& comm_in) - { - MPI_Comm comm; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_dup(comm_in, &comm)); - communicator comm_out(comm); - return comm_out; - } - /** * Return the underlying MPI_Comm object. * @@ -390,16 +313,6 @@ class communicator { */ bool operator==(const communicator& rhs) { return compare(rhs.get()); } - /** - * Check if the underlying comm object is owned - * - * @return if the underlying comm object is owned - */ - bool is_owning() - { - return comm_.get_deleter().target_type() == typeid(comm_deleter); - } - /** * This function is used to synchronize the ranks in the communicator. * Calls MPI_Barrier @@ -407,9 +320,7 @@ class communicator { void synchronize() const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(get())); } private: - using comm_manager = - std::unique_ptr>; - comm_manager comm_; + std::shared_ptr comm_; int size_{}; int rank_{}; int node_local_rank_{}; @@ -443,7 +354,7 @@ class communicator { { int flag; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(get(), other, &flag)); - return flag; + return flag == MPI_IDENT; } }; From 509ca6cf35570dad6186cb200edec5f3ca204b3a Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 26 Nov 2021 11:41:29 +0100 Subject: [PATCH 41/59] Move everything to member funcs of comm and window --- core/test/mpi/base/bindings.cpp | 80 +- include/ginkgo/core/base/mpi.hpp | 1238 +++++++++++++++--------------- 2 files changed, 638 insertions(+), 680 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index d95aa692295..346c95469b7 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -61,7 +61,7 @@ TYPED_TEST_SUITE(MpiBindings, gko::test::PODTypes, TypenameNameGenerator); TYPED_TEST(MpiBindings, CanSetADefaultwindow) { gko::mpi::window win; - ASSERT_EQ(win.get(), MPI_WIN_NULL); + ASSERT_EQ(win.get_window(), MPI_WIN_NULL); } @@ -71,7 +71,7 @@ TYPED_TEST(MpiBindings, CanCreatewindow) auto comm = gko::mpi::communicator(MPI_COMM_WORLD); auto win = gko::mpi::window(data.data(), 4 * sizeof(TypeParam), comm); - ASSERT_NE(win.get(), MPI_WIN_NULL); + ASSERT_NE(win.get_window(), MPI_WIN_NULL); win.lock_all(); win.unlock_all(); } @@ -87,12 +87,12 @@ TYPED_TEST(MpiBindings, CanSendAndRecvValues) auto send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::send(send_array.data(), 4, rank, 40 + rank, comm); + comm.send(send_array.data(), 4, rank, 40 + rank); } } } else { recv_array = gko::Array{this->ref, 4}; - gko::mpi::recv(recv_array.get_data(), 4, 0, 40 + my_rank, comm); + comm.recv(recv_array.get_data(), 4, 0, 40 + my_rank); } if (my_rank != 0) { auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; @@ -115,14 +115,14 @@ TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - req1.emplace_back(gko::mpi::i_send(send_array.data(), 4, rank, - 40 + rank, comm)); + req1.emplace_back( + comm.i_send(send_array.data(), 4, rank, 40 + rank)); } } } else { recv_array = gko::Array{this->ref, 4}; - req2 = std::move( - gko::mpi::i_recv(recv_array.get_data(), 4, 0, 40 + my_rank, comm)); + req2 = + std::move(comm.i_recv(recv_array.get_data(), 4, 0, 40 + my_rank)); } if (my_rank == 0) { auto stat1 = gko::mpi::wait_all(req1); @@ -155,7 +155,7 @@ TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::put(data.data(), 4, rank, 0, 4, win); + win.put(data.data(), 4, rank, 0, 4); } win.flush_local(0); win.flush(rank); @@ -186,7 +186,7 @@ TYPED_TEST(MpiBindings, CanPutValuesWithExclusiveLock) for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { win.lock(rank, 0, window::lock_type::exclusive); - gko::mpi::put(data.data(), 4, rank, 0, 4, win); + win.put(data.data(), 4, rank, 0, 4); win.flush(0); win.unlock(rank); } @@ -216,7 +216,7 @@ TYPED_TEST(MpiBindings, CanPutValuesWithFence) if (my_rank == 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::put(data.data(), 4, rank, 0, 4, win); + win.put(data.data(), 4, rank, 0, 4); } } } @@ -244,7 +244,7 @@ TYPED_TEST(MpiBindings, CanGetValuesWithLockAll) win.lock_all(); for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::get(data.data(), 4, 0, 0, 4, win); + win.get(data.data(), 4, 0, 0, 4); } } win.unlock_all(); @@ -272,7 +272,7 @@ TYPED_TEST(MpiBindings, CanGetValuesWithExclusiveLock) for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { win.lock(0, 0, window::lock_type::exclusive); - gko::mpi::get(data.data(), 4, 0, 0, 4, win); + win.get(data.data(), 4, 0, 0, 4); win.unlock(0); } } @@ -300,7 +300,7 @@ TYPED_TEST(MpiBindings, CanGetValuesWithFence) if (my_rank != 0) { for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - gko::mpi::get(data.data(), 4, 0, 0, 4, win); + win.get(data.data(), 4, 0, 0, 4); } } } @@ -320,7 +320,7 @@ TYPED_TEST(MpiBindings, CanBroadcastValues) if (my_rank == 0) { array = gko::Array(this->ref, {2, 3, 1, 3, -1, 0, 3, 1}); } - gko::mpi::broadcast(array.get_data(), 8, 0, comm); + comm.broadcast(array.get_data(), 8, 0); auto comp_data = array.get_data(); ASSERT_EQ(comp_data[0], TypeParam{2}); ASSERT_EQ(comp_data[1], TypeParam{3}); @@ -349,9 +349,9 @@ TYPED_TEST(MpiBindings, CanReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::reduce(&data, &sum, 1, MPI_SUM, 0, comm); - gko::mpi::reduce(&data, &max, 1, MPI_MAX, 0, comm); - gko::mpi::reduce(&data, &min, 1, MPI_MIN, 0, comm); + comm.reduce(&data, &sum, 1, MPI_SUM, 0); + comm.reduce(&data, &max, 1, MPI_MAX, 0); + comm.reduce(&data, &min, 1, MPI_MIN, 0); if (my_rank == 0) { EXPECT_EQ(sum, TypeParam{16}); EXPECT_EQ(max, TypeParam{6}); @@ -375,7 +375,7 @@ TYPED_TEST(MpiBindings, CanAllReduceValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, &sum, 1, MPI_SUM, comm); + comm.all_reduce(&data, &sum, 1, MPI_SUM); ASSERT_EQ(sum, TypeParam{16}); } @@ -395,7 +395,7 @@ TYPED_TEST(MpiBindings, CanAllReduceValuesInPlace) } else if (my_rank == 3) { data = 6; } - gko::mpi::all_reduce(&data, 1, MPI_SUM, comm); + comm.all_reduce(&data, 1, MPI_SUM); ASSERT_EQ(data, TypeParam{16}); } @@ -411,8 +411,8 @@ TYPED_TEST(MpiBindings, CanScatterValues) gko::Array{this->ref, {2, 3, 1, 3, -1, 0, 3, 1}}; } auto scatter_into_array = gko::Array{this->ref, 2}; - gko::mpi::scatter(scatter_from_array.get_data(), 2, - scatter_into_array.get_data(), 2, 0, comm); + comm.scatter(scatter_from_array.get_data(), 2, + scatter_into_array.get_data(), 2, 0); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], TypeParam{2}); @@ -448,7 +448,7 @@ TYPED_TEST(MpiBindings, CanGatherValues) } auto gather_array = gko::Array{ this->ref, static_cast(num_ranks)}; - gko::mpi::gather(&data, 1, gather_array.get_data(), 1, 0, comm); + comm.gather(&data, 1, gather_array.get_data(), 1, 0); if (my_rank == 0) { ASSERT_EQ(gather_array.get_data()[0], TypeParam{3}); ASSERT_EQ(gather_array.get_data()[1], TypeParam{5}); @@ -483,10 +483,10 @@ TYPED_TEST(MpiBindings, CanScatterValuesWithDisplacements) } scatter_into_array = gko::Array{this->ref, static_cast(nelems)}; - gko::mpi::gather(&nelems, 1, s_counts.get_data(), 1, 0, comm); - gko::mpi::scatter_v(scatter_from_array.get_data(), s_counts.get_data(), - displacements.get_data(), scatter_into_array.get_data(), - nelems, 0, comm); + comm.gather(&nelems, 1, s_counts.get_data(), 1, 0); + comm.scatter_v(scatter_from_array.get_data(), s_counts.get_data(), + displacements.get_data(), scatter_into_array.get_data(), + nelems, 0); auto comp_data = scatter_into_array.get_data(); if (my_rank == 0) { ASSERT_EQ(comp_data[0], TypeParam{2}); @@ -534,10 +534,10 @@ TYPED_TEST(MpiBindings, CanGatherValuesWithDisplacements) gather_from_array = gko::Array{this->ref, {1, -4, 5}}; } - gko::mpi::gather(&nelems, 1, r_counts.get_data(), 1, 0, comm); - gko::mpi::gather_v(gather_from_array.get_data(), nelems, - gather_into_array.get_data(), r_counts.get_data(), - displacements.get_data(), 0, comm); + comm.gather(&nelems, 1, r_counts.get_data(), 1, 0); + comm.gather_v(gather_from_array.get_data(), nelems, + gather_into_array.get_data(), r_counts.get_data(), + displacements.get_data(), 0); auto comp_data = gather_into_array.get_data(); if (my_rank == 0) { auto ref_array = @@ -572,8 +572,7 @@ TYPED_TEST(MpiBindings, AllToAllWorksCorrectly) ref_array = gko::Array(this->ref, {2, 2, 0, -2}); } - gko::mpi::all_to_all(send_array.get_data(), 1, recv_array.get_data(), 1, - comm); + comm.all_to_all(send_array.get_data(), 1, recv_array.get_data(), 1); GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } @@ -600,7 +599,7 @@ TYPED_TEST(MpiBindings, AllToAllInPlaceWorksCorrectly) ref_array = gko::Array(this->ref, {2, 2, 0, -2}); } - gko::mpi::all_to_all(recv_array.get_data(), 1, comm); + comm.all_to_all(recv_array.get_data(), 1); GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } @@ -651,10 +650,9 @@ TYPED_TEST(MpiBindings, AllToAllVWorksCorrectly) ref_array = gko::Array{this->ref, {0, 2, 3, 3}}; } - gko::mpi::all_to_all_v(send_array.get_data(), scounts_array.get_data(), - soffset_array.get_data(), recv_array.get_data(), - rcounts_array.get_data(), roffset_array.get_data(), - comm); + comm.all_to_all_v(send_array.get_data(), scounts_array.get_data(), + soffset_array.get_data(), recv_array.get_data(), + rcounts_array.get_data(), roffset_array.get_data()); GKO_ASSERT_ARRAY_EQ(recv_array, ref_array); } @@ -674,9 +672,9 @@ TYPED_TEST(MpiBindings, CanScanValues) } else if (my_rank == 3) { data = 6; } - gko::mpi::scan(&data, &sum, 1, MPI_SUM, comm); - gko::mpi::scan(&data, &max, 1, MPI_MAX, comm); - gko::mpi::scan(&data, &min, 1, MPI_MIN, comm); + comm.scan(&data, &sum, 1, MPI_SUM); + comm.scan(&data, &max, 1, MPI_MAX); + comm.scan(&data, &min, 1, MPI_MIN); if (my_rank == 0) { EXPECT_EQ(sum, TypeParam{3}); EXPECT_EQ(max, TypeParam{3}); diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 7c02ae36a71..0c2b3d328b7 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -319,6 +319,517 @@ class communicator { */ void synchronize() const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(get())); } + + /** + * Send (Blocking) data from calling process to destination rank. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param destination_rank the rank to send the data to + * @param send_tag the tag for the send call + */ + template + void send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag) const + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, + type_impl::get_type(), + destination_rank, send_tag, get())); + } + + + /** + * Send (Non-blocking, Immediate return) data from calling process to + * destination rank. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param destination_rank the rank to send the data to + * @param send_tag the tag for the send call + * + * @return the request handle for the send call + */ + template + MPI_Request i_send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag) const + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Isend(send_buffer, send_count, type_impl::get_type(), + destination_rank, send_tag, get(), &req)); + return req; + } + + + /** + * Receive data from source rank. + * + * @param recv_buffer the buffer to send + * @param recv_count the number of elements to send + * @param source_rank the rank to send the data to + * @param recv_tag the tag for the send call + * + * @return the status of completion of this call + */ + template + MPI_Status recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag) const + { + MPI_Status status; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Recv(recv_buffer, recv_count, type_impl::get_type(), + source_rank, recv_tag, get(), &status)); + return status; + } + + + /** + * Receive (Non-blocking, Immediate return) data from source rank. + * + * @param recv_buffer the buffer to send + * @param recv_count the number of elements to send + * @param source_rank the rank to send the data to + * @param recv_tag the tag for the send call + * @param req the request handle for the send call + * + * @return the request handle for the send call + */ + template + MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag) const + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, + type_impl::get_type(), + source_rank, recv_tag, get(), &req)); + return req; + } + + + /** + * Broadcast data from calling process to all ranks in the communicator + * + * @param buffer the buffer to broadcsat + * @param count the number of elements to broadcast + * @param root_rank the rank to broadcast from + */ + template + void broadcast(BroadcastType* buffer, int count, int root_rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count, + type_impl::get_type(), + root_rank, get())); + } + + + /** + * Reduce data into root from all calling processes on the same + * communicator. + * + * @param send_buffer the buffer to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param operation the MPI_Op type reduce operation. + */ + template + void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, int root_rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, root_rank, get())); + } + + + /** + * Reduce data into root from all calling processes on the same + * communicator. + * + * @param send_buffer the buffer to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param operation the MPI_Op type reduce operation. + * + * @return the request handle for the call + */ + template + MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, int root_rank) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce( + send_buffer, recv_buffer, count, type_impl::get_type(), + operation, root_rank, get(), &req)); + return req; + } + + + /** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param recv_buffer the data to reduce and the reduced result + * @param count the number of elements to reduce + * @param operation the MPI_Op type reduce operation. + */ + template + void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation) + { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Allreduce(in_place(), recv_buffer, count, + type_impl::get_type(), operation, get())); + } + + + /** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param recv_buffer the data to reduce and the reduced result + * @param count the number of elements to reduce + * @param operation the reduce operation. See @MPI_Op + * + * @return the request handle for the call + */ + template + MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, + MPI_Op operation) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( + in_place(), recv_buffer, count, + type_impl::get_type(), operation, get(), &req)); + return req; + } + + + /** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param send_buffer the data to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param operation the reduce operation. See @MPI_Op + */ + template + void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation) + { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Allreduce(send_buffer, recv_buffer, count, + type_impl::get_type(), operation, get())); + } + + + /** + * Reduce data from all calling processes from all calling processes on same + * communicator. + * + * @param send_buffer the data to reduce + * @param recv_buffer the reduced result + * @param count the number of elements to reduce + * @param operation the reduce operation. See @MPI_Op + * + * @return the request handle for the call + */ + template + MPI_Request i_all_reduce(const ReduceType* send_buffer, + ReduceType* recv_buffer, int count, + MPI_Op operation) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( + send_buffer, recv_buffer, count, type_impl::get_type(), + operation, get(), &req)); + return req; + } + + + /** + * Gather data onto the root rank from all ranks in the communicator. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param root_rank the rank to gather into + */ + template + void gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank) + { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Gather(send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), + root_rank, get())); + } + + + /** + * Gather data onto the root rank from all ranks in the communicator with + * offsets. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param displacements the offsets for the buffer + * @param root_rank the rank to gather into + */ + template + void gather_v(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int* recv_counts, + const int* displacements, int root_rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( + send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_counts, displacements, + type_impl::get_type(), root_rank, get())); + } + + + /** + * Gather data onto all ranks from all ranks in the communicator. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + */ + template + void all_gather(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( + send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), get())); + } + + + /** + * Scatter data from root rank to all ranks in the communicator. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + */ + template + void scatter(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count, int root_rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( + send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), root_rank, + get())); + } + + + /** + * Scatter data from root rank to all ranks in the communicator with + * offsets. + * + * @param send_buffer the buffer to gather from + * @param send_count the number of elements to send + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param displacements the offsets for the buffer + * @param comm the communicator + */ + template + void scatter_v(const SendType* send_buffer, const int* send_counts, + const int* displacements, RecvType* recv_buffer, + const int recv_count, int root_rank) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( + send_buffer, send_counts, displacements, + type_impl::get_type(), recv_buffer, recv_count, + type_impl::get_type(), root_rank, get())); + } + + + /** + * Communicate data from all ranks to all other ranks in place + * (MPI_Alltoall). See MPI documentation for more details. + * + * @param buffer the buffer to send and the buffer receive + * @param recv_count the number of elements to receive + * @param comm the communicator + * + * @note This overload uses MPI_IN_PLACE and the source and destination + * buffers are the same. + */ + template + void all_to_all(RecvType* recv_buffer, const int recv_count) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( + in_place(), recv_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), get())); + } + + + /** + * Communicate data from all ranks to all other ranks in place + * (MPI_Alltoall). See MPI documentation for more details. + * + * @param buffer the buffer to send and the buffer receive + * @param recv_count the number of elements to receive + * @param comm the communicator + * + * @return the request handle for the call + * + * @note This overload uses MPI_IN_PLACE and the source and destination + * buffers are the same. + */ + template + MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( + in_place(), recv_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), get(), + &req)); + return req; + } + + + /** + * Communicate data from all ranks to all other ranks (MPI_Alltoall). + * See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param recv_buffer the buffer to receive + * @param recv_count the number of elements to receive + */ + template + void all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( + send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), get())); + } + + + /** + * Communicate data from all ranks to all other ranks (MPI_Alltoall). + * See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param recv_buffer the buffer to receive + * @param recv_count the number of elements to receive + * + * @return the request handle for the call + */ + template + MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( + send_buffer, send_count, type_impl::get_type(), + recv_buffer, recv_count, type_impl::get_type(), get(), + &req)); + return req; + } + + + /** + * Communicate data from all ranks to all other ranks with + * offsets (MPI_Alltoallv). See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param send_offsets the offsets for the send buffer + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param recv_offsets the offsets for the recv buffer + * @param comm the communicator + */ + template + void all_to_all_v(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( + send_buffer, send_counts, send_offsets, + type_impl::get_type(), recv_buffer, recv_counts, + recv_offsets, type_impl::get_type(), get())); + } + + + /** + * Communicate data from all ranks to all other ranks with + * offsets (MPI_Alltoallv). See MPI documentation for more details. + * + * @param send_buffer the buffer to send + * @param send_count the number of elements to send + * @param send_offsets the offsets for the send buffer + * @param recv_buffer the buffer to gather into + * @param recv_count the number of elements to receive + * @param recv_offsets the offsets for the recv buffer + * + * @return the request handle for the call + */ + template + MPI_Request i_all_to_all_v(const SendType* send_buffer, + const int* send_counts, const int* send_offsets, + RecvType* recv_buffer, const int* recv_counts, + const int* recv_offsets) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( + send_buffer, send_counts, send_offsets, + type_impl::get_type(), recv_buffer, recv_counts, + recv_offsets, type_impl::get_type(), get(), &req)); + return req; + } + + + /** + * Does a scan operation with the given operator. + * (MPI_Scan). See MPI documentation for more details. + * + * @param send_buffer the buffer to scan from + * @param recv_buffer the result buffer + * @param recv_count the number of elements to scan + * @param operation the operation type to be used for the scan. See @MPI_Op + */ + template + void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, + MPI_Op operation) + { + GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, get())); + } + + + /** + * Does a scan operation with the given operator. + * (MPI_Scan). See MPI documentation for more details. + * + * @param send_buffer the buffer to scan from + * @param recv_buffer the result buffer + * @param recv_count the number of elements to scan + * @param operation the operation type to be used for the scan. See @MPI_Op + * + * @return the request handle for the call + */ + template + MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, + int count, MPI_Op operation) + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, + type_impl::get_type(), + operation, get(), &req)); + return req; + } + private: std::shared_ptr comm_; int size_{}; @@ -482,7 +993,7 @@ class window { * * @return the underlying window object. */ - MPI_Win get() { return this->window_; } + MPI_Win get_window() const { return this->window_; } /** * The active target synchronization using MPI_Win_fence for the window @@ -603,652 +1114,101 @@ class window { } } -private: - MPI_Win window_; -}; - - -/** - * Send (Blocking) data from calling process to destination rank. - * - * @param send_buffer the buffer to send - * @param send_count the number of elements to send - * @param destination_rank the rank to send the data to - * @param send_tag the tag for the send call - * @param comm the communicator - */ -template -inline void send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Send(send_buffer, send_count, - type_impl::get_type(), - destination_rank, send_tag, comm.get())); -} - - -/** - * Send (Non-blocking, Immediate return) data from calling process to - * destination rank. - * - * @param send_buffer the buffer to send - * @param send_count the number of elements to send - * @param destination_rank the rank to send the data to - * @param send_tag the tag for the send call - * @param comm the communicator - * - * @return the request handle for the send call - */ -template -inline MPI_Request i_send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag, - const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Isend(send_buffer, send_count, type_impl::get_type(), - destination_rank, send_tag, comm.get(), &req)); - return req; -} - - -/** - * Receive data from source rank. - * - * @param recv_buffer the buffer to send - * @param recv_count the number of elements to send - * @param source_rank the rank to send the data to - * @param recv_tag the tag for the send call - * @param comm the communicator - * - * @return the status of completion of this call - */ -template -inline MPI_Status recv(RecvType* recv_buffer, const int recv_count, - const int source_rank, const int recv_tag, - const communicator& comm) -{ - MPI_Status status; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Recv(recv_buffer, recv_count, type_impl::get_type(), - source_rank, recv_tag, comm.get(), &status)); - return status; -} - - -/** - * Receive (Non-blocking, Immediate return) data from source rank. - * - * @param recv_buffer the buffer to send - * @param recv_count the number of elements to send - * @param source_rank the rank to send the data to - * @param recv_tag the tag for the send call - * @param req the request handle for the send call - * @param comm the communicator - * - * @return the request handle for the send call - */ -template -inline MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, - const int source_rank, const int recv_tag, - const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Irecv(recv_buffer, recv_count, type_impl::get_type(), - source_rank, recv_tag, comm.get(), &req)); - return req; -} - - -/** - * Put data into the target window. - * - * @param origin_buffer the buffer to send - * @param origin_count the number of elements to put - * @param target_rank the rank to put the data to - * @param target_disp the displacement at the target window - * @param target_count the request handle for the send call - * @param window the window to put the data into - */ -template -inline void put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Put(origin_buffer, origin_count, type_impl::get_type(), - target_rank, target_disp, target_count, - type_impl::get_type(), window.get())); -} - - -/** - * Put data into the target window. - * - * @param origin_buffer the buffer to send - * @param origin_count the number of elements to put - * @param target_rank the rank to put the data to - * @param target_disp the displacement at the target window - * @param target_count the request handle for the send call - * @param window the window to put the data into - * - * @return the request handle for the send call - */ -template -inline MPI_Request r_put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Rput(origin_buffer, origin_count, type_impl::get_type(), - target_rank, target_disp, target_count, - type_impl::get_type(), window.get(), &req)); - return req; -} - - -/** - * Get data from the target window. - * - * @param origin_buffer the buffer to send - * @param origin_count the number of elements to get - * @param target_rank the rank to get the data from - * @param target_disp the displacement at the target window - * @param target_count the request handle for the send call - * @param window the window to put the data into - */ -template -inline void get(GetType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Get(origin_buffer, origin_count, type_impl::get_type(), - target_rank, target_disp, target_count, - type_impl::get_type(), window.get())); -} - - -/** - * Get data (with handle) from the target window. - * - * @param origin_buffer the buffer to send - * @param origin_count the number of elements to get - * @param target_rank the rank to get the data from - * @param target_disp the displacement at the target window - * @param target_count the request handle for the send call - * @param window the window to put the data into - * - * @return the request handle for the send call - */ -template -inline MPI_Request r_get(GetType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count, window& window) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Rget(origin_buffer, origin_count, type_impl::get_type(), - target_rank, target_disp, target_count, - type_impl::get_type(), window, &req)); - return req; -} - - -/** - * Broadcast data from calling process to all ranks in the communicator - * - * @param buffer the buffer to broadcsat - * @param count the number of elements to broadcast - * @param root_rank the rank to broadcast from - * @param comm the communicator - */ -template -inline void broadcast(BroadcastType* buffer, int count, int root_rank, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count, - type_impl::get_type(), - root_rank, comm.get())); -} - - -/** - * Reduce data into root from all calling processes on the same communicator. - * - * @param send_buffer the buffer to reduce - * @param recv_buffer the reduced result - * @param count the number of elements to reduce - * @param operation the MPI_Op type reduce operation. - * @param comm the communicator - */ -template -inline void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, int root_rank, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, - type_impl::get_type(), - operation, root_rank, comm.get())); -} - - -/** - * Reduce data into root from all calling processes on the same communicator. - * - * @param send_buffer the buffer to reduce - * @param recv_buffer the reduced result - * @param count the number of elements to reduce - * @param operation the MPI_Op type reduce operation. - * @param comm the communicator - * - * @return the request handle for the call - */ -template -inline MPI_Request i_reduce(const ReduceType* send_buffer, - ReduceType* recv_buffer, int count, - MPI_Op operation, int root_rank, - const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce( - send_buffer, recv_buffer, count, type_impl::get_type(), - operation, root_rank, comm.get(), &req)); - return req; -} - - -/** - * Reduce data from all calling processes from all calling processes on same - * communicator. - * - * @param recv_buffer the data to reduce and the reduced result - * @param count the number of elements to reduce - * @param operation the MPI_Op type reduce operation. - * @param comm the communicator - */ -template -inline void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce( - in_place(), recv_buffer, count, - type_impl::get_type(), operation, comm.get())); -} - - -/** - * Reduce data from all calling processes from all calling processes on same - * communicator. - * - * @param recv_buffer the data to reduce and the reduced result - * @param count the number of elements to reduce - * @param operation the reduce operation. See @MPI_Op - * @param comm the communicator - * - * @return the request handle for the call - */ -template -inline MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, - MPI_Op operation, const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( - in_place(), recv_buffer, count, - type_impl::get_type(), operation, comm.get(), &req)); - return req; -} - - -/** - * Reduce data from all calling processes from all calling processes on same - * communicator. - * - * @param send_buffer the data to reduce - * @param recv_buffer the reduced result - * @param count the number of elements to reduce - * @param operation the reduce operation. See @MPI_Op - * @param comm the communicator - */ -template -inline void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(send_buffer, recv_buffer, count, - type_impl::get_type(), - operation, comm.get())); -} - - -/** - * Reduce data from all calling processes from all calling processes on same - * communicator. - * - * @param send_buffer the data to reduce - * @param recv_buffer the reduced result - * @param count the number of elements to reduce - * @param operation the reduce operation. See @MPI_Op - * @param comm the communicator - * - * @return the request handle for the call - */ -template -inline MPI_Request i_all_reduce(const ReduceType* send_buffer, - ReduceType* recv_buffer, int count, - MPI_Op operation, const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(send_buffer, recv_buffer, count, - type_impl::get_type(), - operation, comm.get(), &req)); - return req; -} - - -/** - * Gather data onto the root rank from all ranks in the communicator. - * - * @param send_buffer the buffer to gather from - * @param send_count the number of elements to send - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param root_rank the rank to gather into - * @param comm the communicator - */ -template -inline void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Gather( - send_buffer, send_count, type_impl::get_type(), recv_buffer, - recv_count, type_impl::get_type(), root_rank, comm.get())); -} - - -/** - * Gather data onto the root rank from all ranks in the communicator with - * offsets. - * - * @param send_buffer the buffer to gather from - * @param send_count the number of elements to send - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param displacements the offsets for the buffer - * @param root_rank the rank to gather into - * @param comm the communicator - */ -template -inline void gather_v(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Gatherv(send_buffer, send_count, type_impl::get_type(), - recv_buffer, recv_counts, displacements, - type_impl::get_type(), root_rank, comm.get())); -} - - -/** - * Gather data onto all ranks from all ranks in the communicator. - * - * @param send_buffer the buffer to gather from - * @param send_count the number of elements to send - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param comm the communicator - */ -template -inline void all_gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( - send_buffer, send_count, type_impl::get_type(), recv_buffer, - recv_count, type_impl::get_type(), comm.get())); -} - - -/** - * Scatter data from root rank to all ranks in the communicator. - * - * @param send_buffer the buffer to gather from - * @param send_count the number of elements to send - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param comm the communicator - */ -template -inline void scatter(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( - send_buffer, send_count, type_impl::get_type(), recv_buffer, - recv_count, type_impl::get_type(), root_rank, comm.get())); -} - - -/** - * Scatter data from root rank to all ranks in the communicator with offsets. - * - * @param send_buffer the buffer to gather from - * @param send_count the number of elements to send - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param displacements the offsets for the buffer - * @param comm the communicator - */ -template -inline void scatter_v(const SendType* send_buffer, const int* send_counts, - const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Scatterv(send_buffer, send_counts, displacements, - type_impl::get_type(), recv_buffer, recv_count, - type_impl::get_type(), root_rank, comm.get())); -} - - -/** - * Communicate data from all ranks to all other ranks in place (MPI_Alltoall). - * See MPI documentation for more details. - * - * @param buffer the buffer to send and the buffer receive - * @param recv_count the number of elements to receive - * @param comm the communicator - * - * @note This overload uses MPI_IN_PLACE and the source and destination buffers - * are the same. - */ -template -inline void all_to_all(RecvType* recv_buffer, const int recv_count, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( - in_place(), recv_count, type_impl::get_type(), - recv_buffer, recv_count, type_impl::get_type(), comm.get())); -} - - -/** - * Communicate data from all ranks to all other ranks in place (MPI_Alltoall). - * See MPI documentation for more details. - * - * @param buffer the buffer to send and the buffer receive - * @param recv_count the number of elements to receive - * @param comm the communicator - * - * @return the request handle for the call - * - * @note This overload uses MPI_IN_PLACE and the source and destination buffers - * are the same. - */ -template -inline MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count, - const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Ialltoall(in_place(), recv_count, - type_impl::get_type(), recv_buffer, recv_count, - type_impl::get_type(), comm.get(), &req)); - return req; -} - - -/** - * Communicate data from all ranks to all other ranks (MPI_Alltoall). - * See MPI documentation for more details. - * - * @param send_buffer the buffer to send - * @param send_count the number of elements to send - * @param recv_buffer the buffer to receive - * @param recv_count the number of elements to receive - * @param comm the communicator - */ -template -inline void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( - send_buffer, send_count, type_impl::get_type(), recv_buffer, - recv_count, type_impl::get_type(), comm.get())); -} - - -/** - * Communicate data from all ranks to all other ranks (MPI_Alltoall). - * See MPI documentation for more details. - * - * @param send_buffer the buffer to send - * @param send_count the number of elements to send - * @param recv_buffer the buffer to receive - * @param recv_count the number of elements to receive - * @param comm the communicator - * - * @return the request handle for the call - */ -template -inline MPI_Request i_all_to_all(const SendType* send_buffer, - const int send_count, RecvType* recv_buffer, - const int recv_count, const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( - send_buffer, send_count, type_impl::get_type(), recv_buffer, - recv_count, type_impl::get_type(), comm.get(), &req)); - return req; -} + /** + * Put data into the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to put + * @param target_rank the rank to put the data to + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + */ + template + void put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count) + { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Put(origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), get_window())); + } -/** - * Communicate data from all ranks to all other ranks with - * offsets (MPI_Alltoallv). See MPI documentation for more details. - * - * @param send_buffer the buffer to send - * @param send_count the number of elements to send - * @param send_offsets the offsets for the send buffer - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param recv_offsets the offsets for the recv buffer - * @param comm the communicator - */ -template -inline void all_to_all_v(const SendType* send_buffer, const int* send_counts, - const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets, - const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( - send_buffer, send_counts, send_offsets, type_impl::get_type(), - recv_buffer, recv_counts, recv_offsets, type_impl::get_type(), - comm.get())); -} + /** + * Put data into the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to put + * @param target_rank the rank to put the data to + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + * + * @return the request handle for the send call + */ + template + MPI_Request r_put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count) const + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput( + origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), get_window(), &req)); + return req; + } -/** - * Communicate data from all ranks to all other ranks with - * offsets (MPI_Alltoallv). See MPI documentation for more details. - * - * @param send_buffer the buffer to send - * @param send_count the number of elements to send - * @param send_offsets the offsets for the send buffer - * @param recv_buffer the buffer to gather into - * @param recv_count the number of elements to receive - * @param recv_offsets the offsets for the recv buffer - * @param comm the communicator - * - * @return the request handle for the call - */ -template -inline MPI_Request i_all_to_all_v(const SendType* send_buffer, - const int* send_counts, - const int* send_offsets, - RecvType* recv_buffer, const int* recv_counts, - const int* recv_offsets, - const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( - send_buffer, send_counts, send_offsets, type_impl::get_type(), - recv_buffer, recv_counts, recv_offsets, type_impl::get_type(), - comm.get(), &req)); - return req; -} + /** + * Get data from the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to get + * @param target_rank the rank to get the data from + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + */ + template + void get(GetType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count) const + { + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Get(origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), get_window())); + } -/** - * Does a scan operation with the given operator. - * (MPI_Scan). See MPI documentation for more details. - * - * @param send_buffer the buffer to scan from - * @param recv_buffer the result buffer - * @param recv_count the number of elements to scan - * @param operation the operation type to be used for the scan. See @MPI_Op - * @param comm the communicator - * @param req the request handle - */ -template -inline void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - MPI_Op operation, const communicator& comm) -{ - GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, - type_impl::get_type(), - operation, comm.get())); -} + /** + * Get data (with handle) from the target window. + * + * @param origin_buffer the buffer to send + * @param origin_count the number of elements to get + * @param target_rank the rank to get the data from + * @param target_disp the displacement at the target window + * @param target_count the request handle for the send call + * + * @return the request handle for the send call + */ + template + MPI_Request r_get(GetType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count) const + { + MPI_Request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( + origin_buffer, origin_count, type_impl::get_type(), + target_rank, target_disp, target_count, + type_impl::get_type(), get_window(), &req)); + return req; + } -/** - * Does a scan operation with the given operator. - * (MPI_Scan). See MPI documentation for more details. - * - * @param send_buffer the buffer to scan from - * @param recv_buffer the result buffer - * @param recv_count the number of elements to scan - * @param operation the operation type to be used for the scan. See @MPI_Op - * @param comm the communicator - * - * @return the request handle for the call - */ -template -inline MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, - int count, MPI_Op operation, const communicator& comm) -{ - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, - type_impl::get_type(), - operation, comm.get(), &req)); - return req; -} +private: + MPI_Win window_; +}; } // namespace mpi From e913f55eca90dd49935b28d66dce8a34e968abdd Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sat, 27 Nov 2021 09:58:44 +0100 Subject: [PATCH 42/59] Add request and status wrappers --- core/test/mpi/base/bindings.cpp | 20 +-- include/ginkgo/core/base/mpi.hpp | 283 +++++++++++++++++-------------- 2 files changed, 160 insertions(+), 143 deletions(-) diff --git a/core/test/mpi/base/bindings.cpp b/core/test/mpi/base/bindings.cpp index 346c95469b7..523f830eba3 100644 --- a/core/test/mpi/base/bindings.cpp +++ b/core/test/mpi/base/bindings.cpp @@ -109,28 +109,24 @@ TYPED_TEST(MpiBindings, CanNonBlockingSendAndNonBlockingRecvValues) std::vector send_array; auto recv_array = gko::Array{this->ref}; TypeParam* data; - std::vector req1; - MPI_Request req2; + auto req1 = std::vector(num_ranks); + auto req2 = gko::mpi::request(); if (my_rank == 0) { send_array = std::vector{1, 2, 3, 4}; for (auto rank = 0; rank < num_ranks; ++rank) { if (rank != my_rank) { - req1.emplace_back( - comm.i_send(send_array.data(), 4, rank, 40 + rank)); + req1[rank] = comm.i_send(send_array.data(), 4, rank, 40 + rank); } } } else { recv_array = gko::Array{this->ref, 4}; - req2 = - std::move(comm.i_recv(recv_array.get_data(), 4, 0, 40 + my_rank)); + req2 = comm.i_recv(recv_array.get_data(), 4, 0, 40 + my_rank); } if (my_rank == 0) { - auto stat1 = gko::mpi::wait_all(req1); + auto stat1 = wait_all(req1); } else { - auto stat2 = gko::mpi::wait(req2); - int count; - MPI_Get_count(&stat2, gko::mpi::type_impl::get_type(), - &count); + auto stat2 = req2.wait(); + auto count = stat2.get_count(recv_array.get_data()); ASSERT_EQ(count, 4); auto ref_array = gko::Array{this->ref, {1, 2, 3, 4}}; GKO_ASSERT_ARRAY_EQ(ref_array, recv_array); @@ -157,8 +153,6 @@ TYPED_TEST(MpiBindings, CanPutValuesWithLockAll) if (rank != my_rank) { win.put(data.data(), 4, rank, 0, 4); } - win.flush_local(0); - win.flush(rank); } win.unlock_all(); } diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 0c2b3d328b7..fb0fb772fb1 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -201,6 +201,99 @@ class comm_deleter { } // namespace +/** + * The status struct is a light wrapper around the MPI_Status struct. + */ +struct status { + /** + * The default constructor. It creates an empty MPI_Status + */ + status() : status_(MPI_Status{}) {} + + /** + * Get a pointer to the underlying MPI_Status object. + * + * @return a pointer to MPI_Status object + */ + MPI_Status* get() { return &this->status_; } + + /** + * Get the count of the number of elements received by the communication + * call. + * + * @tparam T The datatype of the object that was received. + * + * @param data The data object of type T that was received. + * + * @return the count + */ + template + int get_count(const T* data) const + { + int count; + MPI_Get_count(&status_, type_impl::get_type(), &count); + return count; + } + +private: + MPI_Status status_; +}; + + +/** + * The request class is a light wrapper around the MPI_Request handle class. + */ +class request { +public: + /** + * The default constructor. It creates a null MPI_Request of + * MPI_REQUEST_NULL type. + */ + request() : req_(MPI_REQUEST_NULL) {} + + /** + * Get a pointer to the underlying MPI_Request handle. + * + * @return a pointer to MPI_Request handle + */ + MPI_Request* get() { return &this->req_; } + + /** + * Allows a rank to wait on a particular request handle. + * + * @param req The request to wait on. + * @param status The status variable that can be queried. + */ + status wait() + { + status status; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(&req_, status.get())); + return status; + } + + +private: + MPI_Request req_; +}; + + +/** + * Allows a rank to wait on multiple request handles. + * + * @param req The vector of request handles to be waited on. + * + * @return status The vector of status objects that can be queried. + */ +std::vector wait_all(std::vector& req) +{ + std::vector stat; + for (auto i = 0; i < req.size(); ++i) { + stat.emplace_back(req[i].wait()); + } + return stat; +} + + /** * A communicator class that takes in the given communicator and duplicates it * for our purposes. As the class or object goes out of scope, the communicator @@ -261,23 +354,6 @@ class communicator { this->node_local_rank_ = get_node_local_rank(); } - communicator(const communicator& other) = default; - - communicator& operator=(const communicator& other) = default; - - communicator(communicator&& other) - { - this->comm_ = std::move(other.comm_); - other.comm_.reset(new MPI_Comm(MPI_COMM_NULL)); - } - - communicator& operator=(communicator&& other) - { - this->comm_ = std::move(other.comm_); - other.comm_.reset(new MPI_Comm(MPI_COMM_NULL)); - return *this; - } - /** * Return the underlying MPI_Comm object. * @@ -307,19 +383,25 @@ class communicator { int node_local_rank() const { return node_local_rank_; }; /** - * Compare two communicator objects. + * Compare two communicator objects for equality. * * @return if the two comm objects are equal */ bool operator==(const communicator& rhs) { return compare(rhs.get()); } + /** + * Compare two communicator objects for non-equality. + * + * @return if the two comm objects are not equal + */ + bool operator!=(const communicator& rhs) { return !(*this == rhs); } + /** * This function is used to synchronize the ranks in the communicator. * Calls MPI_Barrier */ void synchronize() const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(get())); } - /** * Send (Blocking) data from calling process to destination rank. * @@ -337,7 +419,6 @@ class communicator { destination_rank, send_tag, get())); } - /** * Send (Non-blocking, Immediate return) data from calling process to * destination rank. @@ -350,17 +431,16 @@ class communicator { * @return the request handle for the send call */ template - MPI_Request i_send(const SendType* send_buffer, const int send_count, - const int destination_rank, const int send_tag) const + request i_send(const SendType* send_buffer, const int send_count, + const int destination_rank, const int send_tag) const { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS( MPI_Isend(send_buffer, send_count, type_impl::get_type(), - destination_rank, send_tag, get(), &req)); + destination_rank, send_tag, get(), req.get())); return req; } - /** * Receive data from source rank. * @@ -372,17 +452,16 @@ class communicator { * @return the status of completion of this call */ template - MPI_Status recv(RecvType* recv_buffer, const int recv_count, - const int source_rank, const int recv_tag) const + status recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag) const { - MPI_Status status; + status st; GKO_ASSERT_NO_MPI_ERRORS( MPI_Recv(recv_buffer, recv_count, type_impl::get_type(), - source_rank, recv_tag, get(), &status)); - return status; + source_rank, recv_tag, get(), st.get())); + return st; } - /** * Receive (Non-blocking, Immediate return) data from source rank. * @@ -395,17 +474,16 @@ class communicator { * @return the request handle for the send call */ template - MPI_Request i_recv(RecvType* recv_buffer, const int recv_count, - const int source_rank, const int recv_tag) const + request i_recv(RecvType* recv_buffer, const int recv_count, + const int source_rank, const int recv_tag) const { - MPI_Request req; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Irecv(recv_buffer, recv_count, - type_impl::get_type(), - source_rank, recv_tag, get(), &req)); + request req; + GKO_ASSERT_NO_MPI_ERRORS( + MPI_Irecv(recv_buffer, recv_count, type_impl::get_type(), + source_rank, recv_tag, get(), req.get())); return req; } - /** * Broadcast data from calling process to all ranks in the communicator * @@ -421,7 +499,6 @@ class communicator { root_rank, get())); } - /** * Reduce data into root from all calling processes on the same * communicator. @@ -440,7 +517,6 @@ class communicator { operation, root_rank, get())); } - /** * Reduce data into root from all calling processes on the same * communicator. @@ -453,17 +529,16 @@ class communicator { * @return the request handle for the call */ template - MPI_Request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, int root_rank) + request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation, int root_rank) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce( send_buffer, recv_buffer, count, type_impl::get_type(), - operation, root_rank, get(), &req)); + operation, root_rank, get(), req.get())); return req; } - /** * Reduce data from all calling processes from all calling processes on same * communicator. @@ -480,7 +555,6 @@ class communicator { type_impl::get_type(), operation, get())); } - /** * Reduce data from all calling processes from all calling processes on same * communicator. @@ -492,17 +566,15 @@ class communicator { * @return the request handle for the call */ template - MPI_Request i_all_reduce(ReduceType* recv_buffer, int count, - MPI_Op operation) + request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( in_place(), recv_buffer, count, - type_impl::get_type(), operation, get(), &req)); + type_impl::get_type(), operation, get(), req.get())); return req; } - /** * Reduce data from all calling processes from all calling processes on same * communicator. @@ -521,7 +593,6 @@ class communicator { type_impl::get_type(), operation, get())); } - /** * Reduce data from all calling processes from all calling processes on same * communicator. @@ -534,18 +605,16 @@ class communicator { * @return the request handle for the call */ template - MPI_Request i_all_reduce(const ReduceType* send_buffer, - ReduceType* recv_buffer, int count, - MPI_Op operation) + request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, + int count, MPI_Op operation) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( send_buffer, recv_buffer, count, type_impl::get_type(), - operation, get(), &req)); + operation, get(), req.get())); return req; } - /** * Gather data onto the root rank from all ranks in the communicator. * @@ -565,7 +634,6 @@ class communicator { root_rank, get())); } - /** * Gather data onto the root rank from all ranks in the communicator with * offsets. @@ -588,7 +656,6 @@ class communicator { type_impl::get_type(), root_rank, get())); } - /** * Gather data onto all ranks from all ranks in the communicator. * @@ -606,7 +673,6 @@ class communicator { recv_buffer, recv_count, type_impl::get_type(), get())); } - /** * Scatter data from root rank to all ranks in the communicator. * @@ -625,7 +691,6 @@ class communicator { get())); } - /** * Scatter data from root rank to all ranks in the communicator with * offsets. @@ -648,7 +713,6 @@ class communicator { type_impl::get_type(), root_rank, get())); } - /** * Communicate data from all ranks to all other ranks in place * (MPI_Alltoall). See MPI documentation for more details. @@ -668,7 +732,6 @@ class communicator { recv_buffer, recv_count, type_impl::get_type(), get())); } - /** * Communicate data from all ranks to all other ranks in place * (MPI_Alltoall). See MPI documentation for more details. @@ -683,17 +746,16 @@ class communicator { * buffers are the same. */ template - MPI_Request i_all_to_all(RecvType* recv_buffer, const int recv_count) + request i_all_to_all(RecvType* recv_buffer, const int recv_count) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( in_place(), recv_count, type_impl::get_type(), recv_buffer, recv_count, type_impl::get_type(), get(), - &req)); + req.get())); return req; } - /** * Communicate data from all ranks to all other ranks (MPI_Alltoall). * See MPI documentation for more details. @@ -712,7 +774,6 @@ class communicator { recv_buffer, recv_count, type_impl::get_type(), get())); } - /** * Communicate data from all ranks to all other ranks (MPI_Alltoall). * See MPI documentation for more details. @@ -725,18 +786,17 @@ class communicator { * @return the request handle for the call */ template - MPI_Request i_all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count) + request i_all_to_all(const SendType* send_buffer, const int send_count, + RecvType* recv_buffer, const int recv_count) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( send_buffer, send_count, type_impl::get_type(), recv_buffer, recv_count, type_impl::get_type(), get(), - &req)); + req.get())); return req; } - /** * Communicate data from all ranks to all other ranks with * offsets (MPI_Alltoallv). See MPI documentation for more details. @@ -760,7 +820,6 @@ class communicator { recv_offsets, type_impl::get_type(), get())); } - /** * Communicate data from all ranks to all other ranks with * offsets (MPI_Alltoallv). See MPI documentation for more details. @@ -775,20 +834,18 @@ class communicator { * @return the request handle for the call */ template - MPI_Request i_all_to_all_v(const SendType* send_buffer, - const int* send_counts, const int* send_offsets, - RecvType* recv_buffer, const int* recv_counts, - const int* recv_offsets) + request i_all_to_all_v(const SendType* send_buffer, const int* send_counts, + const int* send_offsets, RecvType* recv_buffer, + const int* recv_counts, const int* recv_offsets) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( send_buffer, send_counts, send_offsets, type_impl::get_type(), recv_buffer, recv_counts, - recv_offsets, type_impl::get_type(), get(), &req)); + recv_offsets, type_impl::get_type(), get(), req.get())); return req; } - /** * Does a scan operation with the given operator. * (MPI_Scan). See MPI documentation for more details. @@ -807,7 +864,6 @@ class communicator { operation, get())); } - /** * Does a scan operation with the given operator. * (MPI_Scan). See MPI documentation for more details. @@ -820,13 +876,13 @@ class communicator { * @return the request handle for the call */ template - MPI_Request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, - int count, MPI_Op operation) + request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, + int count, MPI_Op operation) { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, type_impl::get_type(), - operation, get(), &req)); + operation, get(), req.get())); return req; } @@ -878,35 +934,6 @@ class communicator { inline double get_walltime() { return MPI_Wtime(); } -/** - * Allows a rank to wait on a particular request handle. - * - * @param req The request to wait on. - * @param status The status variable that can be queried. - */ -inline MPI_Status wait(MPI_Request& req) -{ - MPI_Status status; - GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(&req, &status)); - return status; -} - - -/** - * Allows a rank to wait on multiple request handles. - * - * @param req The request handles to wait on. - * @param status The status variable that can be queried. - */ -inline std::vector wait_all(std::vector& req) -{ - std::vector status(req.size()); - GKO_ASSERT_NO_MPI_ERRORS( - MPI_Waitall(req.size(), req.data(), status.data())); - return status; -} - - /** * This class wraps the MPI_Window class with RAII functionality. Different * create and lock type methods are setup with enums. @@ -1114,7 +1141,6 @@ class window { } } - /** * Put data into the target window. * @@ -1135,7 +1161,6 @@ class window { type_impl::get_type(), get_window())); } - /** * Put data into the target window. * @@ -1148,19 +1173,18 @@ class window { * @return the request handle for the send call */ template - MPI_Request r_put(const PutType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count) const + request r_put(const PutType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count) const { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput( origin_buffer, origin_count, type_impl::get_type(), target_rank, target_disp, target_count, - type_impl::get_type(), get_window(), &req)); + type_impl::get_type(), get_window(), req.get())); return req; } - /** * Get data from the target window. * @@ -1181,7 +1205,6 @@ class window { type_impl::get_type(), get_window())); } - /** * Get data (with handle) from the target window. * @@ -1194,15 +1217,15 @@ class window { * @return the request handle for the send call */ template - MPI_Request r_get(GetType* origin_buffer, const int origin_count, - const int target_rank, const unsigned int target_disp, - const int target_count) const + request r_get(GetType* origin_buffer, const int origin_count, + const int target_rank, const unsigned int target_disp, + const int target_count) const { - MPI_Request req; + request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget( origin_buffer, origin_count, type_impl::get_type(), target_rank, target_disp, target_count, - type_impl::get_type(), get_window(), &req)); + type_impl::get_type(), get_window(), req.get())); return req; } From 37b8a445a8847a08b3fac1360049620dcc720609 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sat, 27 Nov 2021 11:25:51 +0100 Subject: [PATCH 43/59] Review updates. Co-authored-by: Tobias Ribizel Co-authored-by: Marcel Koch Co-authored-by: Aditya Kashi --- CMakeLists.txt | 8 --- cmake/GinkgoConfig.cmake.in | 5 +- core/mpi/get_info.cmake | 2 - core/test/mpi/base/communicator.cpp | 3 +- include/ginkgo/config.hpp.in | 8 +-- include/ginkgo/core/base/mpi.hpp | 93 ++++++++++++----------------- 6 files changed, 44 insertions(+), 75 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bae049c42ac..388ca9d1df8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,6 @@ option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON) option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON) option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP}) option(GINKGO_BUILD_MPI "Compile the MPI module" ${GINKGO_HAS_MPI}) -option(GINKGO_FORCE_GPU_AWARE_MPI "Force the GPU Aware functionality to be enabled" OFF) option(GINKGO_BUILD_DPCPP "Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP}) option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA}) @@ -191,15 +190,8 @@ else() message(STATUS "HWLOC is being forcibly switched off") endif() -set(GINKGO_HAVE_MPI 0) -set(GINKGO_HAVE_GPU_AWARE_MPI 0) if(GINKGO_BUILD_MPI) find_package(MPI REQUIRED) - set(GINKGO_HAVE_MPI 1) - set(GINKGO_HAVE_GPU_AWARE_MPI 0) - if(GINKGO_FORCE_GPU_AWARE_MPI) - set(GINKGO_HAVE_GPU_AWARE_MPI 1) - endif() endif() # We keep using NVCC/HCC for consistency with previous releases even if AMD diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index ec79a229ad4..114cbdbaa21 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -82,8 +82,7 @@ set(GINKGO_DPCPP_FLAGS @GINKGO_DPCPP_FLAGS@) set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@) set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@) -set(GINKGO_HAVE_MPI @GINKGO_HAVE_MPI@) -set(GINKGO_HAVE_CUDA_AWARE_MPI @GINKGO_HAVE_CUDA_AWARE_MPI@) +set(GINKGO_BUILD_MPI @GINKGO_BUILD_MPI@) set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@) @@ -155,7 +154,7 @@ if(GINKGO_HAVE_HWLOC) endif() # Check for MPI if it is enabled -if(GINKGO_HAVE_MPI) +if(GINKGO_BUILD_MPI) find_package(MPI REQUIRED) endif() diff --git a/core/mpi/get_info.cmake b/core/mpi/get_info.cmake index 2a735a46504..247523ee982 100644 --- a/core/mpi/get_info.cmake +++ b/core/mpi/get_info.cmake @@ -1,6 +1,4 @@ ginkgo_print_module_header(${detailed_log} "MPI") -ginkgo_print_variable(${detailed_log} "GINKGO_FORCE_CUDA_AWARE_MPI") -ginkgo_print_variable(${detailed_log} "GKO_CUDA_AWARE_RUN_STATUS") ginkgo_print_variable(${detailed_log} "MPI_C_COMPILER") ginkgo_print_variable(${detailed_log} "MPI_CXX_COMPILER") ginkgo_print_variable(${detailed_log} "MPI_CXX_COMPILE_OPTIONS") diff --git a/core/test/mpi/base/communicator.cpp b/core/test/mpi/base/communicator.cpp index 19db40b9773..d72a217957e 100644 --- a/core/test/mpi/base/communicator.cpp +++ b/core/test/mpi/base/communicator.cpp @@ -114,7 +114,8 @@ TEST_F(Communicator, CommunicatorCanBeMoveConstructed) TEST_F(Communicator, CommunicatorCanBeMoveAssigned) { gko::mpi::communicator comm2(MPI_COMM_WORLD); - gko::mpi::communicator copy = std::move(comm2); + gko::mpi::communicator copy(MPI_COMM_NULL); + copy = std::move(comm2); EXPECT_TRUE(copy == comm); } diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 061914f8f57..8512e021f5d 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -84,13 +84,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* Is MPI available ? */ // clang-format off -#define GKO_HAVE_MPI @GINKGO_HAVE_MPI@ -// clang-format on - - -/* Is GPU-aware MPI available ? */ -// clang-format off -#define GKO_HAVE_GPU_AWARE_MPI @GINKGO_HAVE_GPU_AWARE_MPI@ +#cmakedefine01 GINKGO_BUILD_MPI // clang-format on diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index fb0fb772fb1..b268b120eda 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -48,7 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#if GKO_HAVE_MPI +#if GINKGO_BUILD_MPI #include @@ -131,7 +131,7 @@ class environment { * * @return the provided thread support */ - int get_provided_thread_support() { return provided_thread_support_; } + int get_provided_thread_support() const { return provided_thread_support_; } /** * Call MPI_Init_thread and initialize the MPI environment @@ -166,19 +166,6 @@ class environment { }; -/** - * Returns if GPU aware functionality has been enabled - */ -static bool is_gpu_aware() -{ -#if GKO_HAVE_GPU_AWARE_MPI - return true; -#else - return false; -#endif -} - - namespace { @@ -284,7 +271,7 @@ class request { * * @return status The vector of status objects that can be queried. */ -std::vector wait_all(std::vector& req) +inline std::vector wait_all(std::vector& req) { std::vector stat; for (auto i = 0; i < req.size(); ++i) { @@ -312,9 +299,6 @@ class communicator { communicator(const MPI_Comm& comm) { this->comm_.reset(new MPI_Comm(comm)); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->node_local_rank_ = get_node_local_rank(); } /** @@ -330,9 +314,6 @@ class communicator { MPI_Comm comm_out; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm, color, key, &comm_out)); this->comm_.reset(new MPI_Comm(comm_out), comm_deleter{}); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->node_local_rank_ = get_node_local_rank(); } /** @@ -349,9 +330,6 @@ class communicator { GKO_ASSERT_NO_MPI_ERRORS( MPI_Comm_split(comm.get(), color, key, &comm_out)); this->comm_.reset(new MPI_Comm(comm_out), comm_deleter{}); - this->size_ = get_num_ranks(); - this->rank_ = get_my_rank(); - this->node_local_rank_ = get_node_local_rank(); } /** @@ -366,35 +344,38 @@ class communicator { * * @return the size */ - int size() const { return size_; } + int size() const { return get_num_ranks(); } /** * Return the rank of the calling process in the communicator. * * @return the rank */ - int rank() const { return rank_; }; + int rank() const { return get_my_rank(); }; /** * Return the node local rank of the calling process in the communicator. * * @return the node local rank */ - int node_local_rank() const { return node_local_rank_; }; + int node_local_rank() const { return get_node_local_rank(); }; /** * Compare two communicator objects for equality. * * @return if the two comm objects are equal */ - bool operator==(const communicator& rhs) { return compare(rhs.get()); } + bool operator==(const communicator& rhs) const + { + return compare(rhs.get()); + } /** * Compare two communicator objects for non-equality. * * @return if the two comm objects are not equal */ - bool operator!=(const communicator& rhs) { return !(*this == rhs); } + bool operator!=(const communicator& rhs) const { return !(*this == rhs); } /** * This function is used to synchronize the ranks in the communicator. @@ -492,7 +473,7 @@ class communicator { * @param root_rank the rank to broadcast from */ template - void broadcast(BroadcastType* buffer, int count, int root_rank) + void broadcast(BroadcastType* buffer, int count, int root_rank) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count, type_impl::get_type(), @@ -510,7 +491,7 @@ class communicator { */ template void reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, int root_rank) + int count, MPI_Op operation, int root_rank) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count, type_impl::get_type(), @@ -530,7 +511,7 @@ class communicator { */ template request i_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation, int root_rank) + int count, MPI_Op operation, int root_rank) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce( @@ -548,7 +529,7 @@ class communicator { * @param operation the MPI_Op type reduce operation. */ template - void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation) + void all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation) const { GKO_ASSERT_NO_MPI_ERRORS( MPI_Allreduce(in_place(), recv_buffer, count, @@ -566,7 +547,8 @@ class communicator { * @return the request handle for the call */ template - request i_all_reduce(ReduceType* recv_buffer, int count, MPI_Op operation) + request i_all_reduce(ReduceType* recv_buffer, int count, + MPI_Op operation) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( @@ -586,7 +568,7 @@ class communicator { */ template void all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation) + int count, MPI_Op operation) const { GKO_ASSERT_NO_MPI_ERRORS( MPI_Allreduce(send_buffer, recv_buffer, count, @@ -606,7 +588,7 @@ class communicator { */ template request i_all_reduce(const ReduceType* send_buffer, ReduceType* recv_buffer, - int count, MPI_Op operation) + int count, MPI_Op operation) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce( @@ -626,7 +608,8 @@ class communicator { */ template void gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank) + RecvType* recv_buffer, const int recv_count, + int root_rank) const { GKO_ASSERT_NO_MPI_ERRORS( MPI_Gather(send_buffer, send_count, type_impl::get_type(), @@ -648,7 +631,7 @@ class communicator { template void gather_v(const SendType* send_buffer, const int send_count, RecvType* recv_buffer, const int* recv_counts, - const int* displacements, int root_rank) + const int* displacements, int root_rank) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv( send_buffer, send_count, type_impl::get_type(), @@ -666,7 +649,7 @@ class communicator { */ template void all_gather(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count) + RecvType* recv_buffer, const int recv_count) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather( send_buffer, send_count, type_impl::get_type(), @@ -683,7 +666,8 @@ class communicator { */ template void scatter(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count, int root_rank) + RecvType* recv_buffer, const int recv_count, + int root_rank) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter( send_buffer, send_count, type_impl::get_type(), @@ -705,7 +689,7 @@ class communicator { template void scatter_v(const SendType* send_buffer, const int* send_counts, const int* displacements, RecvType* recv_buffer, - const int recv_count, int root_rank) + const int recv_count, int root_rank) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv( send_buffer, send_counts, displacements, @@ -725,7 +709,7 @@ class communicator { * buffers are the same. */ template - void all_to_all(RecvType* recv_buffer, const int recv_count) + void all_to_all(RecvType* recv_buffer, const int recv_count) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( in_place(), recv_count, type_impl::get_type(), @@ -746,7 +730,7 @@ class communicator { * buffers are the same. */ template - request i_all_to_all(RecvType* recv_buffer, const int recv_count) + request i_all_to_all(RecvType* recv_buffer, const int recv_count) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( @@ -767,7 +751,7 @@ class communicator { */ template void all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count) + RecvType* recv_buffer, const int recv_count) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall( send_buffer, send_count, type_impl::get_type(), @@ -787,7 +771,7 @@ class communicator { */ template request i_all_to_all(const SendType* send_buffer, const int send_count, - RecvType* recv_buffer, const int recv_count) + RecvType* recv_buffer, const int recv_count) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall( @@ -812,7 +796,7 @@ class communicator { template void all_to_all_v(const SendType* send_buffer, const int* send_counts, const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets) + const int* recv_counts, const int* recv_offsets) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv( send_buffer, send_counts, send_offsets, @@ -836,7 +820,8 @@ class communicator { template request i_all_to_all_v(const SendType* send_buffer, const int* send_counts, const int* send_offsets, RecvType* recv_buffer, - const int* recv_counts, const int* recv_offsets) + const int* recv_counts, + const int* recv_offsets) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv( @@ -857,7 +842,7 @@ class communicator { */ template void scan(const ScanType* send_buffer, ScanType* recv_buffer, int count, - MPI_Op operation) + MPI_Op operation) const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count, type_impl::get_type(), @@ -877,7 +862,7 @@ class communicator { */ template request i_scan(const ScanType* send_buffer, ScanType* recv_buffer, - int count, MPI_Op operation) + int count, MPI_Op operation) const { request req; GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count, @@ -892,14 +877,14 @@ class communicator { int rank_{}; int node_local_rank_{}; - int get_my_rank() + int get_my_rank() const { int my_rank = 0; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(get(), &my_rank)); return my_rank; } - int get_node_local_rank() + int get_node_local_rank() const { MPI_Comm local_comm; int rank; @@ -910,7 +895,7 @@ class communicator { return rank; } - int get_num_ranks() + int get_num_ranks() const { int size = 1; GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(get(), &size)); @@ -1153,7 +1138,7 @@ class window { template void put(const PutType* origin_buffer, const int origin_count, const int target_rank, const unsigned int target_disp, - const int target_count) + const int target_count) const { GKO_ASSERT_NO_MPI_ERRORS( MPI_Put(origin_buffer, origin_count, type_impl::get_type(), From 96ba143bbb645f3cd9ae55fd8b504a02baa72321 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 22 Oct 2021 12:57:51 +0200 Subject: [PATCH 44/59] adds partition class Co-authored-by: Tobias Ribizel --- core/CMakeLists.txt | 1 + core/device_hooks/common_kernels.inc.cpp | 27 +++ core/distributed/partition.cpp | 102 +++++++++ core/distributed/partition_kernels.hpp | 94 ++++++++ cuda/CMakeLists.txt | 1 + cuda/distributed/partition_kernels.cu | 78 +++++++ dpcpp/CMakeLists.txt | 1 + dpcpp/distributed/partition_kernels.dp.cpp | 78 +++++++ hip/CMakeLists.txt | 1 + hip/distributed/partition_kernels.hip.cpp | 78 +++++++ include/ginkgo/core/distributed/partition.hpp | 214 ++++++++++++++++++ include/ginkgo/ginkgo.hpp | 2 + omp/CMakeLists.txt | 1 + omp/distributed/partition_kernels.cpp | 154 +++++++++++++ omp/test/CMakeLists.txt | 1 + omp/test/distributed/CMakeLists.txt | 1 + omp/test/distributed/partition_kernels.cpp | 31 +++ reference/CMakeLists.txt | 1 + reference/distributed/partition_kernels.cpp | 119 ++++++++++ reference/test/CMakeLists.txt | 1 + reference/test/distributed/CMakeLists.txt | 1 + .../test/distributed/partition_kernels.cpp | 166 ++++++++++++++ 22 files changed, 1153 insertions(+) create mode 100644 core/distributed/partition.cpp create mode 100644 core/distributed/partition_kernels.hpp create mode 100644 cuda/distributed/partition_kernels.cu create mode 100644 dpcpp/distributed/partition_kernels.dp.cpp create mode 100644 hip/distributed/partition_kernels.hip.cpp create mode 100644 include/ginkgo/core/distributed/partition.hpp create mode 100644 omp/distributed/partition_kernels.cpp create mode 100644 omp/test/distributed/CMakeLists.txt create mode 100644 omp/test/distributed/partition_kernels.cpp create mode 100644 reference/distributed/partition_kernels.cpp create mode 100644 reference/test/distributed/CMakeLists.txt create mode 100644 reference/test/distributed/partition_kernels.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 68b116315a4..3115e7a928b 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(ginkgo base/mtx_io.cpp base/perturbation.cpp base/version.cpp + distributed/partition.cpp factorization/ic.cpp factorization/ilu.cpp factorization/par_ic.cpp diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index fa7221e2d09..0d17d72af51 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/precision_conversion_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/components/reduce_array_kernels.hpp" +#include "core/distributed/partition_kernels.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/ic_kernels.hpp" #include "core/factorization/ilu_kernels.hpp" @@ -210,6 +211,32 @@ GKO_STUB_INDEX_TYPE(GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); } // namespace index_set +namespace partition { + + +GKO_PARTITION_COUNT_RANGES +GKO_NOT_COMPILED(GKO_HOOK_MODULE); + +template +GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + +template +GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + +template +GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); + + +} // namespace partition + + namespace dense { diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp new file mode 100644 index 00000000000..df2e0dff06e --- /dev/null +++ b/core/distributed/partition.cpp @@ -0,0 +1,102 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace distributed { +namespace partition { + + +GKO_REGISTER_OPERATION(count_ranges, partition::count_ranges); +GKO_REGISTER_OPERATION(build_from_mapping, partition::build_from_mapping); +GKO_REGISTER_OPERATION(build_from_contiguous, partition::build_from_contiguous); +GKO_REGISTER_OPERATION(build_ranks, partition::build_ranks); + + +} // namespace partition + + +template +std::unique_ptr> +Partition::build_from_mapping( + std::shared_ptr exec, const Array& mapping, + comm_index_type num_parts) +{ + auto local_mapping = make_temporary_clone(exec, &mapping); + size_type num_ranges{}; + exec->run(partition::make_count_ranges(*local_mapping.get(), num_ranges)); + auto result = Partition::create(exec, num_parts, num_ranges); + exec->run( + partition::make_build_from_mapping(*local_mapping.get(), result.get())); + result->compute_range_ranks(); + return result; +} + + +template +std::unique_ptr> +Partition::build_from_contiguous( + std::shared_ptr exec, + const Array& ranges) +{ + auto local_ranges = make_temporary_clone(exec, &ranges); + auto result = Partition::create( + exec, static_cast(ranges.get_num_elems() - 1), + ranges.get_num_elems() - 1); + exec->run(partition::make_build_from_contiguous(*local_ranges.get(), + result.get())); + result->compute_range_ranks(); + return result; +} + + +template +void Partition::compute_range_ranks() +{ + auto exec = offsets_.get_executor(); + exec->run(partition::make_build_ranks( + offsets_.get_const_data(), part_ids_.get_const_data(), get_num_ranges(), + get_num_parts(), ranks_.get_data(), part_sizes_.get_data())); +} + + +#define GKO_DECLARE_PARTITION(_type) class Partition<_type> +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION); + + +} // namespace distributed +} // namespace gko \ No newline at end of file diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp new file mode 100644 index 00000000000..a51d8021da3 --- /dev/null +++ b/core/distributed/partition_kernels.hpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_DISTRIBUTED_PARTITION_KERNELS_HPP_ +#define GKO_CORE_DISTRIBUTED_PARTITION_KERNELS_HPP_ + + +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + + +#define GKO_PARTITION_COUNT_RANGES \ + void count_ranges(std::shared_ptr exec, \ + const Array& mapping, \ + size_type& num_ranges) + +#define GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType) \ + void build_from_contiguous( \ + std::shared_ptr exec, \ + const Array& ranges, \ + distributed::Partition* partition) + +#define GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType) \ + void build_from_mapping(std::shared_ptr exec, \ + const Array& mapping, \ + distributed::Partition* partition) + +#define GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) \ + void build_ranks(std::shared_ptr exec, \ + const global_index_type* range_offsets, \ + const int* range_parts, size_type num_ranges, \ + int num_parts, LocalIndexType* ranks, \ + LocalIndexType* sizes) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + using global_index_type = distributed::global_index_type; \ + using comm_index_type = distributed::comm_index_type; \ + GKO_PARTITION_COUNT_RANGES; \ + template \ + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType); \ + template \ + GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType); \ + template \ + GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_DISTRIBUTED_PARTITION_KERNELS_HPP_ diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index c5e8a7f44b9..fd1ed8ca3a4 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -75,6 +75,7 @@ target_sources(ginkgo_cuda base/version.cpp components/device_matrix_data_kernels.cu components/prefix_sum_kernels.cu + distributed/partition_kernels.cu factorization/factorization_kernels.cu factorization/ic_kernels.cu factorization/ilu_kernels.cu diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu new file mode 100644 index 00000000000..17fe9fe3e66 --- /dev/null +++ b/cuda/distributed/partition_kernels.cu @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, + size_type& num_ranges) GKO_NOT_IMPLEMENTED; + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + distributed::Partition* partition) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + distributed::Partition* partition) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranks(std::shared_ptr exec, + const global_index_type* range_offsets, const int* range_parts, + size_type num_ranges, int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); + + +} // namespace partition +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 8122aec30cf..f896ac396a8 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -20,6 +20,7 @@ target_sources(ginkgo_dpcpp base/index_set_kernels.dp.cpp components/device_matrix_data_kernels.dp.cpp components/prefix_sum_kernels.dp.cpp + distributed/partition_kernels.dp.cpp factorization/ic_kernels.dp.cpp factorization/ilu_kernels.dp.cpp factorization/factorization_kernels.dp.cpp diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp new file mode 100644 index 00000000000..67a2de3e61b --- /dev/null +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, + size_type& num_ranges) GKO_NOT_IMPLEMENTED; + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + distributed::Partition& partition) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + distributed::Partition& partition) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranks(std::shared_ptr exec, + const global_index_type* range_offsets, const int* range_parts, + size_type num_ranges, int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); + + +} // namespace partition +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 3124ab0775c..55b2c4605b8 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -160,6 +160,7 @@ set(GINKGO_HIP_SOURCES base/version.hip.cpp components/device_matrix_data_kernels.hip.cpp components/prefix_sum_kernels.hip.cpp + distributed/partition_kernels.hip.cpp factorization/factorization_kernels.hip.cpp factorization/ic_kernels.hip.cpp factorization/ilu_kernels.hip.cpp diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp new file mode 100644 index 00000000000..fc2319cb67b --- /dev/null +++ b/hip/distributed/partition_kernels.hip.cpp @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, + size_type& num_ranges) GKO_NOT_IMPLEMENTED; + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + distributed::Partition* partition) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + distributed::Partition* partition) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranks(std::shared_ptr exec, + const global_index_type* range_offsets, const int* range_parts, + size_type num_ranges, int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); + + +} // namespace partition +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp new file mode 100644 index 00000000000..6586ec5841d --- /dev/null +++ b/include/ginkgo/core/distributed/partition.hpp @@ -0,0 +1,214 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HPP_ + + +#include +#include + + +namespace gko { +namespace distributed { + + +using global_index_type = int64; +using comm_index_type = int; + + +/** + * Represents a partition of a range of indices [0, size) into a disjoint set of + * parts. The partition is stored as a set of consecutive ranges [begin, end) + * with an associated part ID and local index (number of indices in this part + * before `begin`). + * Global indices are stored as 64 bit signed integers (int64), part-local + * indices use LocalIndexType, Part IDs use 32 bit signed integers (int) + * + * @tparam LocalIndexType The index type used for part-local indices. + * To prevent overflows, no single part's size may + * exceed this index type's maximum value. + */ +template +class Partition : public EnablePolymorphicObject>, + public EnablePolymorphicAssignment>, + public EnableCreateMethod> { + friend class EnableCreateMethod>; + friend class EnablePolymorphicObject>; + static_assert(sizeof(global_index_type) >= sizeof(LocalIndexType), + "global_index_type must be at least as large as " + "LocalIndexType"); + +public: + using EnableCreateMethod>::create; + using EnablePolymorphicAssignment>::convert_to; + using EnablePolymorphicAssignment>::move_to; + + using local_index_type = LocalIndexType; + + /** + * Returns the total number of elements represented by this partition. + */ + size_type get_size() const + { + return offsets_.get_executor()->copy_val_to_host( + offsets_.get_const_data() + get_num_ranges()); + } + + /** + * Returns the number of ranges stored by this partition. + * This size refers to the data returned by get_range_bounds(). + */ + size_type get_num_ranges() const { return offsets_.get_num_elems() - 1; } + + /** + * Returns the number of parts represented in this partition. + */ + comm_index_type get_num_parts() const { return num_parts_; } + + /** + * Returns the ranges boundary array stored by this partition. + * `range_bounds[i]` is the beginning (inclusive) and + * `range_bounds[i + 1]` is the end (exclusive) of the ith range. + */ + const global_index_type* get_const_range_bounds() const + { + return offsets_.get_const_data(); + } + + /** + * @copydoc get_const_range_bounds() + */ + global_index_type* get_range_bounds() { return offsets_.get_data(); } + + /** + * Returns the part ID array stored by this partition. + * For each range from get_range_bounds(), it stores the part ID in the + * range [0, get_num_parts() - 1]. + */ + const comm_index_type* get_const_part_ids() const + { + return part_ids_.get_const_data(); + } + + /** + * @copydoc get_const_part_ids() + */ + comm_index_type* get_part_ids() { return part_ids_.get_data(); } + + /** + * Compute the range_ranks and part_sizes based on the current range_bounds + * and part_ids. + */ + void compute_range_ranks(); + + /** + * Returns the part-local base index for each range in this partition. + * range_ranks[i] + * These values can only be used after compute_range_ranks() was executed. + */ + const local_index_type* get_range_ranks() const + { + return ranks_.get_const_data(); + } + + /** + * Returns the part size array. + * part_sizes[p] stores the number of elements in part `p`. + */ + const local_index_type* get_part_sizes() const + { + return part_sizes_.get_const_data(); + } + + /** + * Returns the part size array. + * part_sizes[p] stores the number of elements in part `p`. + */ + local_index_type get_part_size(comm_index_type part) const + { + return this->get_executor()->copy_val_to_host( + part_sizes_.get_const_data() + part); + } + + /** + * Builds a partition from a given mapping global_index -> part_id. + * @param exec the Executor on which the partition should be built + * @param mapping the mapping from global indices to part IDs. + * @param num_parts the number of parts used in the mapping. + * @return a Partition representing the given mapping as a set of ranges + */ + static std::unique_ptr build_from_mapping( + std::shared_ptr exec, + const Array& mapping, comm_index_type num_parts); + + /** + * Builds a partition consisting of contiguous ranges, one for each part. + * @param exec the Executor on which the partition should be built + * @param ranges the boundaries of the ranges representing each part. + Part i contains the indices [ranges[i], ranges[i + 1]). + * @return a Partition representing the given contiguous partitioning. + */ + static std::unique_ptr build_from_contiguous( + std::shared_ptr exec, + const Array& ranges); + + /** + * Creates a partition stored on the given executor with the given number of + * consecutive ranges and parts. + */ + Partition(std::shared_ptr exec, + comm_index_type num_parts = 0, size_type num_ranges = 0) + : EnablePolymorphicObject{exec}, + num_parts_{num_parts}, + offsets_{exec, num_ranges + 1}, + ranks_{exec, num_ranges}, + part_sizes_{exec, static_cast(num_parts)}, + part_ids_{exec, num_ranges} + { + // TODO zero out contents + } + +private: + comm_index_type num_parts_; + Array offsets_; + Array ranks_; + Array part_sizes_; + Array part_ids_; +}; + + +} // namespace distributed +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index b4014ff6ccf..5c154644773 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -69,6 +69,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include + #include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 9492915892d..79b29e053c3 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -7,6 +7,7 @@ target_sources(ginkgo_omp base/version.cpp components/device_matrix_data_kernels.cpp components/prefix_sum_kernels.cpp + distributed/partition_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp factorization/ilu_kernels.cpp diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..57c9c77d239 --- /dev/null +++ b/omp/distributed/partition_kernels.cpp @@ -0,0 +1,154 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +#include + + +#include "core/base/allocator.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, size_type& num_ranges) +{ + num_ranges = 0; + auto mapping_data = mapping.get_const_data(); +#pragma omp parallel for reduction(+ : num_ranges) + for (size_type i = 0; i < mapping.get_num_elems(); i++) { + auto cur_part = mapping_data[i]; + auto prev_part = i == 0 ? comm_index_type{-1} : mapping_data[i - 1]; + num_ranges += cur_part != prev_part; + } +} + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + distributed::Partition* partition) +{ + partition->get_range_bounds()[0] = 0; +#pragma omp parallel for + for (comm_index_type i = 0; i < ranges.get_num_elems() - 1; i++) { + auto begin = ranges.get_const_data()[i]; + auto end = ranges.get_const_data()[i + 1]; + partition->get_range_bounds()[i + 1] = end; + partition->get_part_ids()[i] = i; + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + distributed::Partition* partition) +{ + size_type range_idx{}; + comm_index_type range_part{-1}; + for (size_type i = 0; i < mapping.get_num_elems(); i++) { + auto cur_part = mapping.get_const_data()[i]; + if (cur_part != range_part) { + partition->get_range_bounds()[range_idx] = i; + partition->get_part_ids()[range_idx] = cur_part; + range_idx++; + range_part = cur_part; + } + } + partition->get_range_bounds()[range_idx] = + static_cast(mapping.get_num_elems()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranks(std::shared_ptr exec, + const global_index_type* range_offsets, const int* range_parts, + size_type num_ranges, int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) +{ + std::fill_n(sizes, num_parts, 0); + auto num_threads = static_cast(omp_get_max_threads()); + auto size_per_thread = (num_ranges + num_threads - 1) / num_threads; + vector local_sizes(num_parts * num_threads, 0, {exec}); +#pragma omp parallel + { + auto thread_id = static_cast(omp_get_thread_num()); + auto thread_begin = size_per_thread * thread_id; + auto thread_end = std::min(num_ranges, thread_begin + size_per_thread); + auto base = num_parts * thread_id; + // local exclusive prefix sum + for (auto range = thread_begin; range < thread_end; range++) { + auto begin = range_offsets[range]; + auto end = range_offsets[range + 1]; + auto part = range_parts[range]; + ranks[range] = local_sizes[part + base]; + local_sizes[part + base] += end - begin; + } + // exclusive prefix sum over local sizes +#pragma omp for + for (comm_index_type part = 0; part < num_parts; ++part) { + LocalIndexType size{}; + for (size_type thread = 0; thread < num_threads; ++thread) { + auto idx = num_parts * thread + part; + auto local_size = local_sizes[idx]; + local_sizes[idx] = size; + size += local_size; + } + sizes[part] = size; + } + // add global baselines to local ranks + for (auto range = thread_begin; range < thread_end; range++) { + auto part = range_parts[range]; + ranks[range] += local_sizes[part + base]; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); + + +} // namespace partition +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index 35590c60df1..30d801a186e 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,6 +2,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(multigrid) diff --git a/omp/test/distributed/CMakeLists.txt b/omp/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..78a626512af --- /dev/null +++ b/omp/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(partition_kernels) diff --git a/omp/test/distributed/partition_kernels.cpp b/omp/test/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..fa0beceb70a --- /dev/null +++ b/omp/test/distributed/partition_kernels.cpp @@ -0,0 +1,31 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 11787edc228..83f770bc941 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -9,6 +9,7 @@ target_sources(ginkgo_reference components/reduce_array_kernels.cpp components/precision_conversion_kernels.cpp components/prefix_sum_kernels.cpp + distributed/partition_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp factorization/ilu_kernels.cpp diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..c74395a614d --- /dev/null +++ b/reference/distributed/partition_kernels.cpp @@ -0,0 +1,119 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, size_type& num_ranges) +{ + num_ranges = 0; + comm_index_type prev_part{-1}; + for (size_type i = 0; i < mapping.get_num_elems(); i++) { + auto cur_part = mapping.get_const_data()[i]; + num_ranges += cur_part != prev_part; + prev_part = cur_part; + } +} + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + distributed::Partition* partition) +{ + partition->get_range_bounds()[0] = 0; + for (comm_index_type i = 0; i < ranges.get_num_elems() - 1; i++) { + auto begin = ranges.get_const_data()[i]; + auto end = ranges.get_const_data()[i + 1]; + partition->get_range_bounds()[i + 1] = end; + partition->get_part_ids()[i] = i; + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + distributed::Partition* partition) +{ + size_type range_idx{}; + comm_index_type range_part{-1}; + for (size_type i = 0; i < mapping.get_num_elems(); i++) { + auto cur_part = mapping.get_const_data()[i]; + if (cur_part != range_part) { + partition->get_range_bounds()[range_idx] = i; + partition->get_part_ids()[range_idx] = cur_part; + range_idx++; + range_part = cur_part; + } + } + partition->get_range_bounds()[range_idx] = + static_cast(mapping.get_num_elems()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranks(std::shared_ptr exec, + const global_index_type* range_offsets, const int* range_parts, + size_type num_ranges, int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) +{ + std::fill_n(sizes, num_parts, 0); + for (size_type range = 0; range < num_ranges; ++range) { + auto begin = range_offsets[range]; + auto end = range_offsets[range + 1]; + auto part = range_parts[range]; + auto rank = sizes[part]; + ranks[range] = rank; + sizes[part] += end - begin; + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); + + +} // namespace partition +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/test/CMakeLists.txt b/reference/test/CMakeLists.txt index e145507be4a..b7cb46408b5 100644 --- a/reference/test/CMakeLists.txt +++ b/reference/test/CMakeLists.txt @@ -2,6 +2,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(log) add_subdirectory(matrix) diff --git a/reference/test/distributed/CMakeLists.txt b/reference/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..78a626512af --- /dev/null +++ b/reference/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(partition_kernels) diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..260b3e47390 --- /dev/null +++ b/reference/test/distributed/partition_kernels.cpp @@ -0,0 +1,166 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include +#include + + +#include + + +#include "core/distributed/partition_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using global_index_type = gko::distributed::global_index_type; +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Partition : public ::testing::Test { +protected: + using local_index_type = LocalIndexType; + Partition() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + +TYPED_TEST_SUITE(Partition, gko::test::IndexTypes); + + +TYPED_TEST(Partition, BuildsFromMapping) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array mapping{ + this->ref, {2, 2, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0}}; + comm_index_type num_parts = 3; + gko::size_type num_ranges = 10; + + auto partition = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + + EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); + EXPECT_EQ(partition->get_num_ranges(), num_ranges); + EXPECT_EQ(partition->get_num_parts(), num_parts); + EXPECT_EQ(partition->get_const_range_bounds(), + partition->get_range_bounds()); + EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 2); + EXPECT_EQ(partition->get_const_range_bounds()[2], 3); + EXPECT_EQ(partition->get_const_range_bounds()[3], 5); + EXPECT_EQ(partition->get_const_range_bounds()[4], 6); + EXPECT_EQ(partition->get_const_range_bounds()[5], 8); + EXPECT_EQ(partition->get_const_range_bounds()[6], 9); + EXPECT_EQ(partition->get_const_range_bounds()[7], 10); + EXPECT_EQ(partition->get_const_range_bounds()[8], 13); + EXPECT_EQ(partition->get_const_range_bounds()[9], 15); + EXPECT_EQ(partition->get_const_range_bounds()[10], 16); + EXPECT_EQ(partition->get_part_ids()[0], 2); + EXPECT_EQ(partition->get_part_ids()[1], 0); + EXPECT_EQ(partition->get_part_ids()[2], 1); + EXPECT_EQ(partition->get_part_ids()[3], 2); + EXPECT_EQ(partition->get_part_ids()[4], 0); + EXPECT_EQ(partition->get_part_ids()[5], 1); + EXPECT_EQ(partition->get_part_ids()[6], 0); + EXPECT_EQ(partition->get_part_ids()[7], 1); + EXPECT_EQ(partition->get_part_ids()[8], 2); + EXPECT_EQ(partition->get_part_ids()[9], 0); + EXPECT_EQ(partition->get_range_ranks()[0], 0); + EXPECT_EQ(partition->get_range_ranks()[1], 0); + EXPECT_EQ(partition->get_range_ranks()[2], 0); + EXPECT_EQ(partition->get_range_ranks()[3], 2); + EXPECT_EQ(partition->get_range_ranks()[4], 1); + EXPECT_EQ(partition->get_range_ranks()[5], 2); + EXPECT_EQ(partition->get_range_ranks()[6], 3); + EXPECT_EQ(partition->get_range_ranks()[7], 3); + EXPECT_EQ(partition->get_range_ranks()[8], 3); + EXPECT_EQ(partition->get_range_ranks()[9], 4); + EXPECT_EQ(partition->get_part_sizes()[0], 5); + EXPECT_EQ(partition->get_part_sizes()[1], 6); + EXPECT_EQ(partition->get_part_sizes()[2], 5); +} + + +TYPED_TEST(Partition, BuildsFromRanges) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{this->ref, {0, 5, 5, 7, 9, 10}}; + + auto partition = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + + EXPECT_EQ(partition->get_size(), + ranges.get_const_data()[ranges.get_num_elems() - 1]); + EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1); + EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1); + EXPECT_EQ(partition->get_const_range_bounds(), + partition->get_range_bounds()); + EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 5); + EXPECT_EQ(partition->get_const_range_bounds()[2], 5); + EXPECT_EQ(partition->get_const_range_bounds()[3], 7); + EXPECT_EQ(partition->get_const_range_bounds()[4], 9); + EXPECT_EQ(partition->get_const_range_bounds()[5], 10); + EXPECT_EQ(partition->get_part_ids()[0], 0); + EXPECT_EQ(partition->get_part_ids()[1], 1); + EXPECT_EQ(partition->get_part_ids()[2], 2); + EXPECT_EQ(partition->get_part_ids()[3], 3); + EXPECT_EQ(partition->get_part_ids()[4], 4); + EXPECT_EQ(partition->get_range_ranks()[0], 0); + EXPECT_EQ(partition->get_range_ranks()[1], 0); + EXPECT_EQ(partition->get_range_ranks()[2], 0); + EXPECT_EQ(partition->get_range_ranks()[3], 0); + EXPECT_EQ(partition->get_range_ranks()[4], 0); + EXPECT_EQ(partition->get_part_sizes()[0], 5); + EXPECT_EQ(partition->get_part_sizes()[1], 0); + EXPECT_EQ(partition->get_part_sizes()[2], 2); + EXPECT_EQ(partition->get_part_sizes()[3], 2); + EXPECT_EQ(partition->get_part_sizes()[4], 1); +} + + +} // namespace From 83b9071bc4e7454f7d8937e33a2ac573a1afc908 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 24 Aug 2021 16:40:04 +0200 Subject: [PATCH 45/59] add sorting tests to partition --- core/device_hooks/common_kernels.inc.cpp | 5 ++ core/distributed/partition.cpp | 24 +++++++- core/distributed/partition_kernels.hpp | 10 +++- cuda/distributed/partition_kernels.cu | 7 +++ dpcpp/distributed/partition_kernels.dp.cpp | 6 ++ hip/distributed/partition_kernels.hip.cpp | 7 +++ include/ginkgo/core/distributed/partition.hpp | 13 ++++ omp/distributed/partition_kernels.cpp | 6 ++ reference/distributed/partition_kernels.cpp | 17 ++++++ .../test/distributed/partition_kernels.cpp | 60 +++++++++++++++++++ 10 files changed, 152 insertions(+), 3 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 0d17d72af51..6ff9896e06a 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -233,6 +233,11 @@ GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +template +GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + } // namespace partition diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index df2e0dff06e..e7d9e772340 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -45,6 +45,7 @@ GKO_REGISTER_OPERATION(count_ranges, partition::count_ranges); GKO_REGISTER_OPERATION(build_from_mapping, partition::build_from_mapping); GKO_REGISTER_OPERATION(build_from_contiguous, partition::build_from_contiguous); GKO_REGISTER_OPERATION(build_ranks, partition::build_ranks); +GKO_REGISTER_OPERATION(is_ordered, partition::is_ordered); } // namespace partition @@ -94,9 +95,30 @@ void Partition::compute_range_ranks() } +template +bool Partition::is_connected() +{ + return get_num_parts() == get_num_ranges(); +} + + +template +bool Partition::is_ordered() +{ + if (is_connected()) { + auto exec = this->get_executor(); + bool is_ordered; + exec->run(partition::make_is_ordered(this, &is_ordered)); + return is_ordered; + } else { + return false; + } +} + + #define GKO_DECLARE_PARTITION(_type) class Partition<_type> GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION); } // namespace distributed -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index a51d8021da3..d00ffd0c950 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -67,6 +67,11 @@ namespace kernels { int num_parts, LocalIndexType* ranks, \ LocalIndexType* sizes) +#define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) \ + void is_ordered(std::shared_ptr exec, \ + const distributed::Partition *partition, \ + bool *result) + #define GKO_DECLARE_ALL_AS_TEMPLATES \ using global_index_type = distributed::global_index_type; \ @@ -77,8 +82,9 @@ namespace kernels { template \ GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType); \ template \ - GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) - + GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType); \ + template \ + GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition, GKO_DECLARE_ALL_AS_TEMPLATES); diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index 17fe9fe3e66..24f6a669f5a 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -72,6 +72,13 @@ void build_ranks(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +template +void is_ordered(std::shared_ptr exec, + const distributed::Partition *partition, + bool *result) GKO_NOT_IMPLEMENTED; +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + + } // namespace partition } // namespace cuda } // namespace kernels diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 67a2de3e61b..313cfa58e68 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -72,6 +72,12 @@ void build_ranks(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +template +void is_ordered(std::shared_ptr exec, + const distributed::Partition *partition, + bool *result) GKO_NOT_IMPLEMENTED; +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + } // namespace partition } // namespace dpcpp } // namespace kernels diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index fc2319cb67b..48de990469d 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -72,6 +72,13 @@ void build_ranks(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +template +void is_ordered(std::shared_ptr exec, + const distributed::Partition *partition, + bool *result) GKO_NOT_IMPLEMENTED; +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + + } // namespace partition } // namespace hip } // namespace kernels diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 6586ec5841d..3f10eb3cc5b 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -160,6 +160,19 @@ class Partition : public EnablePolymorphicObject>, part_sizes_.get_const_data() + part); } + /** + * Checks if each part is associated with a contiguous range. + */ + bool is_connected(); + + /** + * Checks if the ranges are orderd by their part index. + * + * Implies that the partition is connected. + */ + bool is_ordered(); + + /** * Builds a partition from a given mapping global_index -> part_id. * @param exec the Executor on which the partition should be built diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index 57c9c77d239..defd4f702c7 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -148,6 +148,12 @@ void build_ranks(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +template +void is_ordered(std::shared_ptr exec, + const distributed::Partition *partition, + bool *result) GKO_NOT_IMPLEMENTED; +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + } // namespace partition } // namespace omp } // namespace kernels diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index c74395a614d..faa339f794e 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -112,6 +112,23 @@ void build_ranks(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +template +void is_ordered(std::shared_ptr exec, + const distributed::Partition *partition, + bool *result) +{ + *result = true; + auto part_ids = partition->get_const_part_ids(); + + for (comm_index_type i = 1; i < partition->get_num_ranges(); ++i) { + if (part_ids[i] < part_ids[i - 1]) { + *result = false; + return; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition } // namespace reference diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index 260b3e47390..bdc9f5ecb18 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -163,4 +163,64 @@ TYPED_TEST(Partition, BuildsFromRanges) } +TYPED_TEST(Partition, IsConnected) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, + 3)); + + ASSERT_TRUE(part->is_connected()); +} + + +TYPED_TEST(Partition, IsConnectedUnordered) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, + 3)); + + ASSERT_TRUE(part->is_connected()); +} + + +TYPED_TEST(Partition, IsConnectedFail) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, + 3)); + + ASSERT_FALSE(part->is_connected()); +} + + +TYPED_TEST(Partition, IsOrdered) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, + 3)); + + ASSERT_FALSE(part->is_ordered()); +} + + +TYPED_TEST(Partition, IsOrderedFail) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, + 3)); + + ASSERT_TRUE(part->is_ordered()); +} + + } // namespace From a193c907d8b418a2bf9d649efd824c59d4d1066d Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Tue, 7 Sep 2021 12:08:59 +0000 Subject: [PATCH 46/59] Format files Co-authored-by: Marcel Koch --- core/distributed/partition_kernels.hpp | 4 ++-- cuda/distributed/partition_kernels.cu | 4 ++-- dpcpp/distributed/partition_kernels.dp.cpp | 4 ++-- hip/distributed/partition_kernels.hip.cpp | 4 ++-- omp/distributed/partition_kernels.cpp | 4 ++-- omp/test/distributed/partition_kernels.cpp | 1 + reference/distributed/partition_kernels.cpp | 4 ++-- 7 files changed, 13 insertions(+), 12 deletions(-) diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index d00ffd0c950..32df9a79dc5 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -69,8 +69,8 @@ namespace kernels { #define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) \ void is_ordered(std::shared_ptr exec, \ - const distributed::Partition *partition, \ - bool *result) + const distributed::Partition* partition, \ + bool* result) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index 24f6a669f5a..bea089e2f7a 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -74,8 +74,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); template void is_ordered(std::shared_ptr exec, - const distributed::Partition *partition, - bool *result) GKO_NOT_IMPLEMENTED; + const distributed::Partition* partition, + bool* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 313cfa58e68..52239ddfcf8 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -74,8 +74,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); template void is_ordered(std::shared_ptr exec, - const distributed::Partition *partition, - bool *result) GKO_NOT_IMPLEMENTED; + const distributed::Partition* partition, + bool* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index 48de990469d..d83f4fdb159 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -74,8 +74,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); template void is_ordered(std::shared_ptr exec, - const distributed::Partition *partition, - bool *result) GKO_NOT_IMPLEMENTED; + const distributed::Partition* partition, + bool* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index defd4f702c7..43c582ef1c0 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -150,8 +150,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); template void is_ordered(std::shared_ptr exec, - const distributed::Partition *partition, - bool *result) GKO_NOT_IMPLEMENTED; + const distributed::Partition* partition, + bool* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition diff --git a/omp/test/distributed/partition_kernels.cpp b/omp/test/distributed/partition_kernels.cpp index fa0beceb70a..f4b950da788 100644 --- a/omp/test/distributed/partition_kernels.cpp +++ b/omp/test/distributed/partition_kernels.cpp @@ -29,3 +29,4 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ + diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index faa339f794e..28357f64179 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -114,8 +114,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); template void is_ordered(std::shared_ptr exec, - const distributed::Partition *partition, - bool *result) + const distributed::Partition* partition, + bool* result) { *result = true; auto part_ids = partition->get_const_part_ids(); From 06404d2c6297bd9951e5c2b1356ed1402740cf8a Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 23 Oct 2021 18:16:57 +0200 Subject: [PATCH 47/59] add partition kernels --- common/CMakeLists.txt | 1 + .../unified/distributed/partition_kernels.cpp | 128 +++++++ cuda/distributed/partition_kernels.cu | 93 ++++-- dpcpp/distributed/partition_kernels.dp.cpp | 77 +++-- hip/distributed/partition_kernels.hip.cpp | 93 ++++-- include/ginkgo/core/base/array.hpp | 19 ++ omp/distributed/partition_kernels.cpp | 57 +--- omp/test/distributed/partition_kernels.cpp | 194 +++++++++++ test/CMakeLists.txt | 3 +- test/distributed/CMakeLists.txt | 1 + test/distributed/partition_kernels.cpp | 314 ++++++++++++++++++ 11 files changed, 848 insertions(+), 132 deletions(-) create mode 100644 common/unified/distributed/partition_kernels.cpp create mode 100644 test/distributed/CMakeLists.txt create mode 100644 test/distributed/partition_kernels.cpp diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 26115b184c0..cafffb708f6 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -5,6 +5,7 @@ set(UNIFIED_SOURCES components/fill_array_kernels.cpp components/precision_conversion_kernels.cpp components/reduce_array_kernels.cpp + distributed/partition_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..52e2b8d153e --- /dev/null +++ b/common/unified/distributed/partition_kernels.cpp @@ -0,0 +1,128 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +#include "common/unified/base/kernel_launch.hpp" +#include "common/unified/base/kernel_launch_reduction.hpp" +#include "core/components/prefix_sum_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, size_type& num_ranges) +{ + Array result{exec, 1}; + run_kernel_reduction( + exec, + [] GKO_KERNEL(auto i, auto mapping) { + auto cur_part = mapping[i]; + auto prev_part = i == 0 ? comm_index_type{-1} : mapping[i - 1]; + return cur_part != prev_part ? 1 : 0; + }, + [] GKO_KERNEL(auto a, auto b) { return a + b; }, + [] GKO_KERNEL(auto a) { return a; }, size_type{}, result.get_data(), + mapping.get_num_elems(), mapping); + num_ranges = exec->copy_val_to_host(result.get_const_data()); +} + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + distributed::Partition* partition) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto ranges, auto bounds, auto ids) { + if (i == 0) { + bounds[0] = 0; + } + bounds[i + 1] = ranges[i + 1]; + ids[i] = i; + }, + ranges.get_num_elems() - 1, ranges, partition->get_range_bounds(), + partition->get_part_ids()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + distributed::Partition* partition) +{ + Array range_index_ranks{exec, mapping.get_num_elems() + 1}; + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto mapping, auto output) { + const auto prev_part = i > 0 ? mapping[i - 1] : comm_index_type{-1}; + const auto cur_part = mapping[i]; + output[i] = cur_part != prev_part ? 1 : 0; + }, + mapping.get_num_elems(), mapping, range_index_ranks); + components::prefix_sum(exec, range_index_ranks.get_data(), + mapping.get_num_elems() + 1); + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto size, auto mapping, auto prefix_sum, + auto ranges, auto range_parts) { + const auto prev_part = i > 0 ? mapping[i - 1] : comm_index_type{-1}; + const auto cur_part = i < size ? mapping[i] : comm_index_type{-1}; + if (cur_part != prev_part) { + auto out_idx = prefix_sum[i]; + ranges[out_idx] = i; + if (i < size) { + range_parts[out_idx] = cur_part; + } + } + }, + mapping.get_num_elems() + 1, mapping.get_num_elems(), mapping, + range_index_ranks, partition->get_range_bounds(), + partition->get_part_ids()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); + + +} // namespace partition +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index bea089e2f7a..d5be073f182 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -33,41 +33,84 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/distributed/partition_kernels.hpp" -namespace gko { -namespace kernels { -namespace cuda { -namespace partition { - - -void count_ranges(std::shared_ptr exec, - const Array& mapping, - size_type& num_ranges) GKO_NOT_IMPLEMENTED; - +#include +#include +#include +#include +#include -template -void build_from_contiguous(std::shared_ptr exec, - const Array& ranges, - distributed::Partition* partition) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); +#include "common/unified/base/kernel_launch.hpp" +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" -template -void build_from_mapping(std::shared_ptr exec, - const Array& mapping, - distributed::Partition* partition) - GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); +namespace gko { +namespace kernels { +namespace cuda { +namespace partition { template void build_ranks(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) GKO_NOT_IMPLEMENTED; + LocalIndexType* sizes) +{ + Array range_sizes{exec, num_ranges}; + // num_parts sentinel at the end + Array tmp_part_ids{exec, num_ranges + 1}; + Array permutation{exec, num_ranges}; + // set sizes to 0 in case of empty parts + components::fill_array(exec, sizes, num_parts, LocalIndexType{}); + + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, + auto range_offsets, auto range_parts, auto range_sizes, + auto tmp_part_ids, auto permutation) { + if (i == 0) { + // set sentinel value at the end + tmp_part_ids[num_ranges] = num_parts; + } + range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; + tmp_part_ids[i] = range_parts[i]; + permutation[i] = static_cast(i); + }, + num_ranges, num_ranges, num_parts, range_offsets, range_parts, + range_sizes, tmp_part_ids, permutation); + + auto tmp_part_id_ptr = thrust::device_pointer_cast(tmp_part_ids.get_data()); + auto range_sizes_ptr = thrust::device_pointer_cast(range_sizes.get_data()); + auto permutation_ptr = thrust::device_pointer_cast(permutation.get_data()); + auto value_it = thrust::make_zip_iterator( + thrust::make_tuple(range_sizes_ptr, permutation_ptr)); + // group sizes by part ID + thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, value_it); + // compute inclusive prefix sum for each part + thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, range_sizes_ptr, + range_sizes_ptr); + // write back the results + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, + auto orig_idxs, auto ranks, auto sizes) { + auto prev_part = + i > 0 ? grouped_part_ids[i - 1] : comm_index_type{-1}; + auto cur_part = grouped_part_ids[i]; + auto next_part = grouped_part_ids[i + 1]; // safe due to sentinel + if (cur_part != next_part) { + sizes[cur_part] = grouped_range_ranks[i]; + } + // write result shifted by one entry to get exclusive prefix sum + ranks[orig_idxs[i]] = prev_part == cur_part + ? grouped_range_ranks[i - 1] + : LocalIndexType{}; + }, + num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 52239ddfcf8..114f524ba64 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -39,35 +39,62 @@ namespace dpcpp { namespace partition { -void count_ranges(std::shared_ptr exec, - const Array& mapping, - size_type& num_ranges) GKO_NOT_IMPLEMENTED; - - -template -void build_from_contiguous(std::shared_ptr exec, - const Array& ranges, - distributed::Partition& partition) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); - - -template -void build_from_mapping(std::shared_ptr exec, - const Array& mapping, - distributed::Partition& partition) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); - - template void build_ranks(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) GKO_NOT_IMPLEMENTED; + LocalIndexType* sizes) +{ + Array range_sizes{exec, num_ranges}; + // num_parts sentinel at the end + Array tmp_part_ids{exec, num_ranges + 1}; + Array permutation{exec, num_ranges}; + // set sizes to 0 in case of empty parts + components::fill_array(exec, sizes, num_parts, LocalIndexType{}); + + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, + auto range_offsets, auto range_parts, auto range_sizes, + auto tmp_part_ids, auto permutation) { + if (i == 0) { + // set sentinel value at the end + tmp_part_ids[num_ranges] = num_parts; + } + range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; + tmp_part_ids[i] = range_parts[i]; + permutation[i] = static_cast(i); + }, + num_ranges, num_ranges, num_parts, range_offsets, range_parts, + range_sizes, tmp_part_ids, permutation); + + // group sizes by part ID + // TODO oneDPL has stable_sort and views::zip + // compute inclusive prefix sum for each part + // TODO compute "row_ptrs" for tmp_part_ids + // TODO compute prefix_sum over range_sizes + // TODO compute adjacent differences, set -part_size at part boundaries + // TODO compute prefix_sum again + // write back the results + // TODO this needs to be adapted to the output of the algorithm above + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, + auto orig_idxs, auto ranks, auto sizes) { + auto prev_part = + i > 0 ? grouped_part_ids[i - 1] : comm_index_type{-1}; + auto cur_part = grouped_part_ids[i]; + auto next_part = grouped_part_ids[i + 1]; // safe due to sentinel + if (cur_part != next_part) { + sizes[cur_part] = grouped_range_ranks[i]; + } + // write result shifted by one entry to get exclusive prefix sum + ranks[orig_idxs[i]] = prev_part == cur_part + ? grouped_range_ranks[i - 1] + : LocalIndexType{}; + }, + num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index d83f4fdb159..f49e097d979 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -33,41 +33,84 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/distributed/partition_kernels.hpp" -namespace gko { -namespace kernels { -namespace hip { -namespace partition { - - -void count_ranges(std::shared_ptr exec, - const Array& mapping, - size_type& num_ranges) GKO_NOT_IMPLEMENTED; - +#include +#include +#include +#include +#include -template -void build_from_contiguous(std::shared_ptr exec, - const Array& ranges, - distributed::Partition* partition) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); +#include "common/unified/base/kernel_launch.hpp" +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" -template -void build_from_mapping(std::shared_ptr exec, - const Array& mapping, - distributed::Partition* partition) - GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); +namespace gko { +namespace kernels { +namespace hip { +namespace partition { template void build_ranks(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) GKO_NOT_IMPLEMENTED; + LocalIndexType* sizes) +{ + Array range_sizes{exec, num_ranges}; + // num_parts sentinel at the end + Array tmp_part_ids{exec, num_ranges + 1}; + Array permutation{exec, num_ranges}; + // set sizes to 0 in case of empty parts + components::fill_array(exec, sizes, num_parts, LocalIndexType{}); + + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, + auto range_offsets, auto range_parts, auto range_sizes, + auto tmp_part_ids, auto permutation) { + if (i == 0) { + // set sentinel value at the end + tmp_part_ids[num_ranges] = num_parts; + } + range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; + tmp_part_ids[i] = range_parts[i]; + permutation[i] = static_cast(i); + }, + num_ranges, num_ranges, num_parts, range_offsets, range_parts, + range_sizes, tmp_part_ids, permutation); + + auto tmp_part_id_ptr = thrust::device_pointer_cast(tmp_part_ids.get_data()); + auto range_sizes_ptr = thrust::device_pointer_cast(range_sizes.get_data()); + auto permutation_ptr = thrust::device_pointer_cast(permutation.get_data()); + auto value_it = thrust::make_zip_iterator( + thrust::make_tuple(range_sizes_ptr, permutation_ptr)); + // group sizes by part ID + thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, value_it); + // compute inclusive prefix sum for each part + thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, range_sizes_ptr, + range_sizes_ptr); + // write back the results + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, + auto orig_idxs, auto ranks, auto sizes) { + auto prev_part = + i > 0 ? grouped_part_ids[i - 1] : comm_index_type{-1}; + auto cur_part = grouped_part_ids[i]; + auto next_part = grouped_part_ids[i + 1]; // safe due to sentinel + if (cur_part != next_part) { + sizes[cur_part] = grouped_range_ranks[i]; + } + // write result shifted by one entry to get exclusive prefix sum + ranks[orig_idxs[i]] = prev_part == cur_part + ? grouped_range_ranks[i - 1] + : LocalIndexType{}; + }, + num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp index bd3180cca85..e714697cc40 100644 --- a/include/ginkgo/core/base/array.hpp +++ b/include/ginkgo/core/base/array.hpp @@ -725,6 +725,25 @@ template void reduce_add(const Array& input_arr, Array& result); +/** + * Helper function to create an array view deducing the value type. + * + * @param exec the executor on which the array resides + * @param size the number of elements for the array + * @param data the pointer to the array we create a view on. + * + * @tparam ValueType the type of the array elements + * + * @return `Array::view(exec, size, data)` + */ +template +Array make_array_view(std::shared_ptr exec, + size_type size, ValueType* data) +{ + return Array::view(exec, size, data); +} + + namespace detail { diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index 43c582ef1c0..e4a8d3bdf97 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -45,62 +45,6 @@ namespace omp { namespace partition { -void count_ranges(std::shared_ptr exec, - const Array& mapping, size_type& num_ranges) -{ - num_ranges = 0; - auto mapping_data = mapping.get_const_data(); -#pragma omp parallel for reduction(+ : num_ranges) - for (size_type i = 0; i < mapping.get_num_elems(); i++) { - auto cur_part = mapping_data[i]; - auto prev_part = i == 0 ? comm_index_type{-1} : mapping_data[i - 1]; - num_ranges += cur_part != prev_part; - } -} - - -template -void build_from_contiguous(std::shared_ptr exec, - const Array& ranges, - distributed::Partition* partition) -{ - partition->get_range_bounds()[0] = 0; -#pragma omp parallel for - for (comm_index_type i = 0; i < ranges.get_num_elems() - 1; i++) { - auto begin = ranges.get_const_data()[i]; - auto end = ranges.get_const_data()[i + 1]; - partition->get_range_bounds()[i + 1] = end; - partition->get_part_ids()[i] = i; - } -} - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); - - -template -void build_from_mapping(std::shared_ptr exec, - const Array& mapping, - distributed::Partition* partition) -{ - size_type range_idx{}; - comm_index_type range_part{-1}; - for (size_type i = 0; i < mapping.get_num_elems(); i++) { - auto cur_part = mapping.get_const_data()[i]; - if (cur_part != range_part) { - partition->get_range_bounds()[range_idx] = i; - partition->get_part_ids()[range_idx] = cur_part; - range_idx++; - range_part = cur_part; - } - } - partition->get_range_bounds()[range_idx] = - static_cast(mapping.get_num_elems()); -} - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); - - template void build_ranks(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, @@ -125,6 +69,7 @@ void build_ranks(std::shared_ptr exec, ranks[range] = local_sizes[part + base]; local_sizes[part + base] += end - begin; } +#pragma omp barrier // exclusive prefix sum over local sizes #pragma omp for for (comm_index_type part = 0; part < num_parts; ++part) { diff --git a/omp/test/distributed/partition_kernels.cpp b/omp/test/distributed/partition_kernels.cpp index f4b950da788..bdc9f5ecb18 100644 --- a/omp/test/distributed/partition_kernels.cpp +++ b/omp/test/distributed/partition_kernels.cpp @@ -30,3 +30,197 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include + + +#include +#include +#include + + +#include +#include + + +#include + + +#include "core/distributed/partition_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using global_index_type = gko::distributed::global_index_type; +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Partition : public ::testing::Test { +protected: + using local_index_type = LocalIndexType; + Partition() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + +TYPED_TEST_SUITE(Partition, gko::test::IndexTypes); + + +TYPED_TEST(Partition, BuildsFromMapping) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array mapping{ + this->ref, {2, 2, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0}}; + comm_index_type num_parts = 3; + gko::size_type num_ranges = 10; + + auto partition = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + + EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); + EXPECT_EQ(partition->get_num_ranges(), num_ranges); + EXPECT_EQ(partition->get_num_parts(), num_parts); + EXPECT_EQ(partition->get_const_range_bounds(), + partition->get_range_bounds()); + EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 2); + EXPECT_EQ(partition->get_const_range_bounds()[2], 3); + EXPECT_EQ(partition->get_const_range_bounds()[3], 5); + EXPECT_EQ(partition->get_const_range_bounds()[4], 6); + EXPECT_EQ(partition->get_const_range_bounds()[5], 8); + EXPECT_EQ(partition->get_const_range_bounds()[6], 9); + EXPECT_EQ(partition->get_const_range_bounds()[7], 10); + EXPECT_EQ(partition->get_const_range_bounds()[8], 13); + EXPECT_EQ(partition->get_const_range_bounds()[9], 15); + EXPECT_EQ(partition->get_const_range_bounds()[10], 16); + EXPECT_EQ(partition->get_part_ids()[0], 2); + EXPECT_EQ(partition->get_part_ids()[1], 0); + EXPECT_EQ(partition->get_part_ids()[2], 1); + EXPECT_EQ(partition->get_part_ids()[3], 2); + EXPECT_EQ(partition->get_part_ids()[4], 0); + EXPECT_EQ(partition->get_part_ids()[5], 1); + EXPECT_EQ(partition->get_part_ids()[6], 0); + EXPECT_EQ(partition->get_part_ids()[7], 1); + EXPECT_EQ(partition->get_part_ids()[8], 2); + EXPECT_EQ(partition->get_part_ids()[9], 0); + EXPECT_EQ(partition->get_range_ranks()[0], 0); + EXPECT_EQ(partition->get_range_ranks()[1], 0); + EXPECT_EQ(partition->get_range_ranks()[2], 0); + EXPECT_EQ(partition->get_range_ranks()[3], 2); + EXPECT_EQ(partition->get_range_ranks()[4], 1); + EXPECT_EQ(partition->get_range_ranks()[5], 2); + EXPECT_EQ(partition->get_range_ranks()[6], 3); + EXPECT_EQ(partition->get_range_ranks()[7], 3); + EXPECT_EQ(partition->get_range_ranks()[8], 3); + EXPECT_EQ(partition->get_range_ranks()[9], 4); + EXPECT_EQ(partition->get_part_sizes()[0], 5); + EXPECT_EQ(partition->get_part_sizes()[1], 6); + EXPECT_EQ(partition->get_part_sizes()[2], 5); +} + + +TYPED_TEST(Partition, BuildsFromRanges) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{this->ref, {0, 5, 5, 7, 9, 10}}; + + auto partition = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + + EXPECT_EQ(partition->get_size(), + ranges.get_const_data()[ranges.get_num_elems() - 1]); + EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1); + EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1); + EXPECT_EQ(partition->get_const_range_bounds(), + partition->get_range_bounds()); + EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 5); + EXPECT_EQ(partition->get_const_range_bounds()[2], 5); + EXPECT_EQ(partition->get_const_range_bounds()[3], 7); + EXPECT_EQ(partition->get_const_range_bounds()[4], 9); + EXPECT_EQ(partition->get_const_range_bounds()[5], 10); + EXPECT_EQ(partition->get_part_ids()[0], 0); + EXPECT_EQ(partition->get_part_ids()[1], 1); + EXPECT_EQ(partition->get_part_ids()[2], 2); + EXPECT_EQ(partition->get_part_ids()[3], 3); + EXPECT_EQ(partition->get_part_ids()[4], 4); + EXPECT_EQ(partition->get_range_ranks()[0], 0); + EXPECT_EQ(partition->get_range_ranks()[1], 0); + EXPECT_EQ(partition->get_range_ranks()[2], 0); + EXPECT_EQ(partition->get_range_ranks()[3], 0); + EXPECT_EQ(partition->get_range_ranks()[4], 0); + EXPECT_EQ(partition->get_part_sizes()[0], 5); + EXPECT_EQ(partition->get_part_sizes()[1], 0); + EXPECT_EQ(partition->get_part_sizes()[2], 2); + EXPECT_EQ(partition->get_part_sizes()[3], 2); + EXPECT_EQ(partition->get_part_sizes()[4], 1); +} + + +TYPED_TEST(Partition, IsConnected) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, + 3)); + + ASSERT_TRUE(part->is_connected()); +} + + +TYPED_TEST(Partition, IsConnectedUnordered) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, + 3)); + + ASSERT_TRUE(part->is_connected()); +} + + +TYPED_TEST(Partition, IsConnectedFail) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, + 3)); + + ASSERT_FALSE(part->is_connected()); +} + + +TYPED_TEST(Partition, IsOrdered) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, + 3)); + + ASSERT_FALSE(part->is_ordered()); +} + + +TYPED_TEST(Partition, IsOrderedFail) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, + 3)); + + ASSERT_TRUE(part->is_ordered()); +} + + +} // namespace diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e2a07ca43a2..a5f43b2faab 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,6 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(matrix) -add_subdirectory(solver) \ No newline at end of file +add_subdirectory(solver) diff --git a/test/distributed/CMakeLists.txt b/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..6c9305372cc --- /dev/null +++ b/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_common_test(partition_kernels) diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..ec2ea52600a --- /dev/null +++ b/test/distributed/partition_kernels.cpp @@ -0,0 +1,314 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include +#include + + +#include + + +#include "core/distributed/partition_kernels.hpp" +#include "core/test/utils.hpp" +#include "test/utils/executor.hpp" + + +namespace { + + +using global_index_type = gko::distributed::global_index_type; +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Partition : public ::testing::Test { +protected: + using local_index_type = LocalIndexType; + Partition() : rand_engine(96457) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + init_executor(ref, exec); + } + + void TearDown() + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } + + void assert_equal( + std::unique_ptr>& part, + std::unique_ptr>& dpart) + { + ASSERT_EQ(part->get_size(), dpart->get_size()); + ASSERT_EQ(part->get_num_ranges(), dpart->get_num_ranges()); + ASSERT_EQ(part->get_num_parts(), dpart->get_num_parts()); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view(this->ref, part->get_num_ranges() + 1, + part->get_range_bounds()), + gko::make_array_view(this->exec, dpart->get_num_ranges() + 1, + dpart->get_range_bounds())); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view(this->ref, part->get_num_ranges(), + part->get_part_ids()), + gko::make_array_view(this->exec, dpart->get_num_ranges(), + dpart->get_part_ids())); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view( + this->ref, part->get_num_ranges(), + const_cast(part->get_range_ranks())), + gko::make_array_view( + this->exec, dpart->get_num_ranges(), + const_cast(dpart->get_range_ranks()))); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view( + this->ref, part->get_num_parts(), + const_cast(part->get_part_sizes())), + gko::make_array_view( + this->exec, dpart->get_num_parts(), + const_cast(dpart->get_part_sizes()))); + } + + std::ranlux48 rand_engine; + + std::shared_ptr ref; + std::shared_ptr exec; +}; + +TYPED_TEST_SUITE(Partition, gko::test::IndexTypes); + + +TYPED_TEST(Partition, BuildsFromMapping) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + std::uniform_int_distribution part_dist{0, num_parts - 1}; + gko::Array mapping{this->ref, 10000}; + for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { + mapping.get_data()[i] = part_dist(this->rand_engine); + } + gko::Array dmapping{this->exec, mapping}; + + auto part = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + // skip part 0 + std::uniform_int_distribution part_dist{1, num_parts - 1}; + gko::Array mapping{this->ref, 10000}; + for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { + mapping.get_data()[i] = part_dist(this->rand_engine); + } + gko::Array dmapping{this->exec, mapping}; + + auto part = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + // return only part 1 + std::uniform_int_distribution part_dist{1, 1}; + gko::Array mapping{this->ref, 10000}; + for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { + mapping.get_data()[i] = part_dist(this->rand_engine); + } + gko::Array dmapping{this->exec, mapping}; + + auto part = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithAllPartsEmpty) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + gko::Array mapping{this->ref, 0}; + gko::Array dmapping{this->exec, 0}; + + auto part = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithOnePart) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 1; + gko::Array mapping{this->ref, 10000}; + mapping.fill(0); + gko::Array dmapping{this->exec, mapping}; + + auto part = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguous) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{this->ref, + {0, 1234, 3134, 4578, 16435, 60000}}; + gko::Array dranges{this->exec, ranges}; + + auto part = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + auto dpart = + gko::distributed::Partition::build_from_contiguous( + this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{ + this->ref, {0, 1234, 3134, 3134, 4578, 16435, 16435, 60000}}; + gko::Array dranges{this->exec, ranges}; + + auto part = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + auto dpart = + gko::distributed::Partition::build_from_contiguous( + this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithSomeMostlyEmptyParts) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{ + this->ref, {0, 0, 3134, 4578, 4578, 4578, 4578, 4578}}; + gko::Array dranges{this->exec, ranges}; + + auto part = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + auto dpart = + gko::distributed::Partition::build_from_contiguous( + this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithOnlyEmptyParts) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{this->ref, {0, 0, 0, 0, 0, 0, 0}}; + gko::Array dranges{this->exec, ranges}; + + auto part = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + auto dpart = + gko::distributed::Partition::build_from_contiguous( + this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array ranges{this->ref, {0, 0}}; + gko::Array dranges{this->exec, ranges}; + + auto part = + gko::distributed::Partition::build_from_contiguous( + this->ref, ranges); + auto dpart = + gko::distributed::Partition::build_from_contiguous( + this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +} // namespace From 8b0982011d46db804d037d8c1c843e9b83ae2c99 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 25 Oct 2021 10:58:07 +0200 Subject: [PATCH 48/59] use kernel stubs --- core/device_hooks/common_kernels.inc.cpp | 28 +++++------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 6ff9896e06a..987e84fcd60 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -214,29 +214,11 @@ GKO_STUB_INDEX_TYPE(GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); namespace partition { -GKO_PARTITION_COUNT_RANGES -GKO_NOT_COMPILED(GKO_HOOK_MODULE); - -template -GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); - -template -GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); - -template -GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); - -template -GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); +GKO_STUB(GKO_PARTITION_COUNT_RANGES); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition From 94624ced1ab2621e02c062e916b125bf1812569f Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 26 Oct 2021 14:22:13 +0200 Subject: [PATCH 49/59] rename range_ranks to range_starting_indices --- core/device_hooks/common_kernels.inc.cpp | 2 +- core/distributed/partition.cpp | 13 +++---- core/distributed/partition_kernels.hpp | 34 +++++++++---------- cuda/distributed/partition_kernels.cu | 12 ++++--- dpcpp/distributed/partition_kernels.dp.cpp | 12 ++++--- hip/distributed/partition_kernels.hip.cpp | 12 ++++--- include/ginkgo/core/distributed/partition.hpp | 31 +++++++++++------ omp/distributed/partition_kernels.cpp | 12 ++++--- omp/test/distributed/partition_kernels.cpp | 30 ++++++++-------- reference/distributed/partition_kernels.cpp | 12 ++++--- .../test/distributed/partition_kernels.cpp | 30 ++++++++-------- test/distributed/partition_kernels.cpp | 12 +++---- 12 files changed, 116 insertions(+), 96 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 987e84fcd60..39f814f4614 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -217,7 +217,7 @@ namespace partition { GKO_STUB(GKO_PARTITION_COUNT_RANGES); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index e7d9e772340..72a6edb63a1 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -44,7 +44,8 @@ namespace partition { GKO_REGISTER_OPERATION(count_ranges, partition::count_ranges); GKO_REGISTER_OPERATION(build_from_mapping, partition::build_from_mapping); GKO_REGISTER_OPERATION(build_from_contiguous, partition::build_from_contiguous); -GKO_REGISTER_OPERATION(build_ranks, partition::build_ranks); +GKO_REGISTER_OPERATION(build_starting_indices, + partition::build_starting_indices); GKO_REGISTER_OPERATION(is_ordered, partition::is_ordered); @@ -63,7 +64,7 @@ Partition::build_from_mapping( auto result = Partition::create(exec, num_parts, num_ranges); exec->run( partition::make_build_from_mapping(*local_mapping.get(), result.get())); - result->compute_range_ranks(); + result->compute_range_starting_indices(); return result; } @@ -80,18 +81,18 @@ Partition::build_from_contiguous( ranges.get_num_elems() - 1); exec->run(partition::make_build_from_contiguous(*local_ranges.get(), result.get())); - result->compute_range_ranks(); + result->compute_range_starting_indices(); return result; } template -void Partition::compute_range_ranks() +void Partition::compute_range_starting_indices() { auto exec = offsets_.get_executor(); - exec->run(partition::make_build_ranks( + exec->run(partition::make_build_starting_indices( offsets_.get_const_data(), part_ids_.get_const_data(), get_num_ranges(), - get_num_parts(), ranks_.get_data(), part_sizes_.get_data())); + get_num_parts(), starting_indices_.get_data(), part_sizes_.get_data())); } diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index 32df9a79dc5..2d316371ed1 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -60,12 +60,12 @@ namespace kernels { const Array& mapping, \ distributed::Partition* partition) -#define GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType) \ - void build_ranks(std::shared_ptr exec, \ - const global_index_type* range_offsets, \ - const int* range_parts, size_type num_ranges, \ - int num_parts, LocalIndexType* ranks, \ - LocalIndexType* sizes) +#define GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType) \ + void build_starting_indices(std::shared_ptr exec, \ + const global_index_type* range_offsets, \ + const int* range_parts, size_type num_ranges, \ + int num_parts, LocalIndexType* ranks, \ + LocalIndexType* sizes) #define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) \ void is_ordered(std::shared_ptr exec, \ @@ -73,17 +73,17 @@ namespace kernels { bool* result) -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - using global_index_type = distributed::global_index_type; \ - using comm_index_type = distributed::comm_index_type; \ - GKO_PARTITION_COUNT_RANGES; \ - template \ - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType); \ - template \ - GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType); \ - template \ - GKO_DECLARE_PARTITION_BUILD_RANKS(LocalIndexType); \ - template \ +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + using global_index_type = distributed::global_index_type; \ + using comm_index_type = distributed::comm_index_type; \ + GKO_PARTITION_COUNT_RANGES; \ + template \ + GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType); \ + template \ + GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType); \ + template \ + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType); \ + template \ GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition, diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index d5be073f182..5b6b257c092 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -52,10 +52,11 @@ namespace partition { template -void build_ranks(std::shared_ptr exec, - const global_index_type* range_offsets, const int* range_parts, - size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) +void build_starting_indices(std::shared_ptr exec, + const global_index_type* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) { Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end @@ -112,7 +113,8 @@ void build_ranks(std::shared_ptr exec, num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); template diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 114f524ba64..11d8d8e2bea 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -40,10 +40,11 @@ namespace partition { template -void build_ranks(std::shared_ptr exec, - const global_index_type* range_offsets, const int* range_parts, - size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) +void build_starting_indices(std::shared_ptr exec, + const global_index_type* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) { Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end @@ -96,7 +97,8 @@ void build_ranks(std::shared_ptr exec, num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); template diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index f49e097d979..e7920f19dae 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -52,10 +52,11 @@ namespace partition { template -void build_ranks(std::shared_ptr exec, - const global_index_type* range_offsets, const int* range_parts, - size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) +void build_starting_indices(std::shared_ptr exec, + const global_index_type* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) { Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end @@ -112,7 +113,8 @@ void build_ranks(std::shared_ptr exec, num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); template diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 3f10eb3cc5b..220c5b8225a 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -126,19 +126,25 @@ class Partition : public EnablePolymorphicObject>, comm_index_type* get_part_ids() { return part_ids_.get_data(); } /** - * Compute the range_ranks and part_sizes based on the current range_bounds - * and part_ids. + * Compute the range_starting_indices and part_sizes based on the current + * range_bounds and part_ids. */ - void compute_range_ranks(); + void compute_range_starting_indices(); /** - * Returns the part-local base index for each range in this partition. - * range_ranks[i] - * These values can only be used after compute_range_ranks() was executed. + * Returns the part-local starting index for each range in this partition. + * + * Consider the partition on `[0, 10)` with + * > p_1 = [0-3, 7-9], + * > p_2 = [4-6]. + * Then `range_ranks[0] = 0`, `range_ranks[1] = 0`, `range_ranks[2] = 5`. + + * @note These values can only be used after compute_range_ranks() was + executed. */ - const local_index_type* get_range_ranks() const + const local_index_type* get_range_starting_indices() const { - return ranks_.get_const_data(); + return starting_indices_.get_const_data(); } /** @@ -204,17 +210,20 @@ class Partition : public EnablePolymorphicObject>, : EnablePolymorphicObject{exec}, num_parts_{num_parts}, offsets_{exec, num_ranges + 1}, - ranks_{exec, num_ranges}, + starting_indices_{exec, num_ranges}, part_sizes_{exec, static_cast(num_parts)}, part_ids_{exec, num_ranges} { - // TODO zero out contents + offsets_.fill(0); + starting_indices_.fill(0); + part_sizes_.fill(0); + part_ids_.fill(0); } private: comm_index_type num_parts_; Array offsets_; - Array ranks_; + Array starting_indices_; Array part_sizes_; Array part_ids_; }; diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index e4a8d3bdf97..32e5ecf569e 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -46,10 +46,11 @@ namespace partition { template -void build_ranks(std::shared_ptr exec, - const global_index_type* range_offsets, const int* range_parts, - size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) +void build_starting_indices(std::shared_ptr exec, + const global_index_type* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) { std::fill_n(sizes, num_parts, 0); auto num_threads = static_cast(omp_get_max_threads()); @@ -90,7 +91,8 @@ void build_ranks(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); template diff --git a/omp/test/distributed/partition_kernels.cpp b/omp/test/distributed/partition_kernels.cpp index bdc9f5ecb18..0e04bc9b88c 100644 --- a/omp/test/distributed/partition_kernels.cpp +++ b/omp/test/distributed/partition_kernels.cpp @@ -107,16 +107,16 @@ TYPED_TEST(Partition, BuildsFromMapping) EXPECT_EQ(partition->get_part_ids()[7], 1); EXPECT_EQ(partition->get_part_ids()[8], 2); EXPECT_EQ(partition->get_part_ids()[9], 0); - EXPECT_EQ(partition->get_range_ranks()[0], 0); - EXPECT_EQ(partition->get_range_ranks()[1], 0); - EXPECT_EQ(partition->get_range_ranks()[2], 0); - EXPECT_EQ(partition->get_range_ranks()[3], 2); - EXPECT_EQ(partition->get_range_ranks()[4], 1); - EXPECT_EQ(partition->get_range_ranks()[5], 2); - EXPECT_EQ(partition->get_range_ranks()[6], 3); - EXPECT_EQ(partition->get_range_ranks()[7], 3); - EXPECT_EQ(partition->get_range_ranks()[8], 3); - EXPECT_EQ(partition->get_range_ranks()[9], 4); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 2); + EXPECT_EQ(partition->get_range_starting_indices()[4], 1); + EXPECT_EQ(partition->get_range_starting_indices()[5], 2); + EXPECT_EQ(partition->get_range_starting_indices()[6], 3); + EXPECT_EQ(partition->get_range_starting_indices()[7], 3); + EXPECT_EQ(partition->get_range_starting_indices()[8], 3); + EXPECT_EQ(partition->get_range_starting_indices()[9], 4); EXPECT_EQ(partition->get_part_sizes()[0], 5); EXPECT_EQ(partition->get_part_sizes()[1], 6); EXPECT_EQ(partition->get_part_sizes()[2], 5); @@ -150,11 +150,11 @@ TYPED_TEST(Partition, BuildsFromRanges) EXPECT_EQ(partition->get_part_ids()[2], 2); EXPECT_EQ(partition->get_part_ids()[3], 3); EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_ranks()[0], 0); - EXPECT_EQ(partition->get_range_ranks()[1], 0); - EXPECT_EQ(partition->get_range_ranks()[2], 0); - EXPECT_EQ(partition->get_range_ranks()[3], 0); - EXPECT_EQ(partition->get_range_ranks()[4], 0); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 0); + EXPECT_EQ(partition->get_range_starting_indices()[4], 0); EXPECT_EQ(partition->get_part_sizes()[0], 5); EXPECT_EQ(partition->get_part_sizes()[1], 0); EXPECT_EQ(partition->get_part_sizes()[2], 2); diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index 28357f64179..cf12dbfe31f 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -94,10 +94,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); template -void build_ranks(std::shared_ptr exec, - const global_index_type* range_offsets, const int* range_parts, - size_type num_ranges, int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) +void build_starting_indices(std::shared_ptr exec, + const global_index_type* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, LocalIndexType* ranks, + LocalIndexType* sizes) { std::fill_n(sizes, num_parts, 0); for (size_type range = 0; range < num_ranges; ++range) { @@ -110,7 +111,8 @@ void build_ranks(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_RANKS); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); template void is_ordered(std::shared_ptr exec, diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index bdc9f5ecb18..0e04bc9b88c 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -107,16 +107,16 @@ TYPED_TEST(Partition, BuildsFromMapping) EXPECT_EQ(partition->get_part_ids()[7], 1); EXPECT_EQ(partition->get_part_ids()[8], 2); EXPECT_EQ(partition->get_part_ids()[9], 0); - EXPECT_EQ(partition->get_range_ranks()[0], 0); - EXPECT_EQ(partition->get_range_ranks()[1], 0); - EXPECT_EQ(partition->get_range_ranks()[2], 0); - EXPECT_EQ(partition->get_range_ranks()[3], 2); - EXPECT_EQ(partition->get_range_ranks()[4], 1); - EXPECT_EQ(partition->get_range_ranks()[5], 2); - EXPECT_EQ(partition->get_range_ranks()[6], 3); - EXPECT_EQ(partition->get_range_ranks()[7], 3); - EXPECT_EQ(partition->get_range_ranks()[8], 3); - EXPECT_EQ(partition->get_range_ranks()[9], 4); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 2); + EXPECT_EQ(partition->get_range_starting_indices()[4], 1); + EXPECT_EQ(partition->get_range_starting_indices()[5], 2); + EXPECT_EQ(partition->get_range_starting_indices()[6], 3); + EXPECT_EQ(partition->get_range_starting_indices()[7], 3); + EXPECT_EQ(partition->get_range_starting_indices()[8], 3); + EXPECT_EQ(partition->get_range_starting_indices()[9], 4); EXPECT_EQ(partition->get_part_sizes()[0], 5); EXPECT_EQ(partition->get_part_sizes()[1], 6); EXPECT_EQ(partition->get_part_sizes()[2], 5); @@ -150,11 +150,11 @@ TYPED_TEST(Partition, BuildsFromRanges) EXPECT_EQ(partition->get_part_ids()[2], 2); EXPECT_EQ(partition->get_part_ids()[3], 3); EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_ranks()[0], 0); - EXPECT_EQ(partition->get_range_ranks()[1], 0); - EXPECT_EQ(partition->get_range_ranks()[2], 0); - EXPECT_EQ(partition->get_range_ranks()[3], 0); - EXPECT_EQ(partition->get_range_ranks()[4], 0); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 0); + EXPECT_EQ(partition->get_range_starting_indices()[4], 0); EXPECT_EQ(partition->get_part_sizes()[0], 5); EXPECT_EQ(partition->get_part_sizes()[1], 0); EXPECT_EQ(partition->get_part_sizes()[2], 2); diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index ec2ea52600a..bca4e1e9853 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -94,12 +94,12 @@ class Partition : public ::testing::Test { gko::make_array_view(this->exec, dpart->get_num_ranges(), dpart->get_part_ids())); GKO_ASSERT_ARRAY_EQ( - gko::make_array_view( - this->ref, part->get_num_ranges(), - const_cast(part->get_range_ranks())), - gko::make_array_view( - this->exec, dpart->get_num_ranges(), - const_cast(dpart->get_range_ranks()))); + gko::make_array_view(this->ref, part->get_num_ranges(), + const_cast( + part->get_range_starting_indices())), + gko::make_array_view(this->exec, dpart->get_num_ranges(), + const_cast( + dpart->get_range_starting_indices()))); GKO_ASSERT_ARRAY_EQ( gko::make_array_view( this->ref, part->get_num_parts(), From 382eeb08d15668c84db70bd407fb66939a0f9655 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 26 Oct 2021 16:24:10 +0200 Subject: [PATCH 50/59] add partition builder from global size --- .../unified/distributed/partition_kernels.cpp | 20 ++++ core/device_hooks/common_kernels.inc.cpp | 1 + core/distributed/partition.cpp | 15 +++ core/distributed/partition_kernels.hpp | 7 ++ include/ginkgo/core/distributed/partition.hpp | 13 +++ reference/distributed/partition_kernels.cpp | 21 ++++ .../test/distributed/partition_kernels.cpp | 104 ++++++++++++++++++ test/distributed/partition_kernels.cpp | 51 +++++++++ 8 files changed, 232 insertions(+) diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp index 52e2b8d153e..45595467d36 100644 --- a/common/unified/distributed/partition_kernels.cpp +++ b/common/unified/distributed/partition_kernels.cpp @@ -122,6 +122,26 @@ void build_from_mapping(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); +template +void build_ranges_from_global_size(std::shared_ptr exec, + int num_parts, int64 global_size, + Array& ranges) +{ + const auto size_per_part = global_size / num_parts; + const auto rest = global_size - (num_parts * size_per_part); + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto size_per_part, auto rest, auto ranges) { + ranges[i] = size_per_part + static_cast(i < rest); + }, + ranges.get_num_elems() - 1, size_per_part, rest, ranges.get_data()); + components::prefix_sum(exec, ranges.get_data(), ranges.get_num_elems()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); + + } // namespace partition } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 39f814f4614..a0a8893c952 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -218,6 +218,7 @@ GKO_STUB(GKO_PARTITION_COUNT_RANGES); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index 72a6edb63a1..d93e836b0ea 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -44,6 +44,8 @@ namespace partition { GKO_REGISTER_OPERATION(count_ranges, partition::count_ranges); GKO_REGISTER_OPERATION(build_from_mapping, partition::build_from_mapping); GKO_REGISTER_OPERATION(build_from_contiguous, partition::build_from_contiguous); +GKO_REGISTER_OPERATION(build_ranges_from_global_size, + partition::build_ranges_from_global_size); GKO_REGISTER_OPERATION(build_starting_indices, partition::build_starting_indices); GKO_REGISTER_OPERATION(is_ordered, partition::is_ordered); @@ -86,6 +88,19 @@ Partition::build_from_contiguous( } +template +std::unique_ptr> +Partition::build_from_global_size( + std::shared_ptr exec, comm_index_type num_parts, + global_index_type global_size) +{ + Array ranges(exec, num_parts + 1); + exec->run(partition::make_build_ranges_from_global_size( + num_parts, global_size, ranges)); + return Partition::build_from_contiguous(exec, ranges); +} + + template void Partition::compute_range_starting_indices() { diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index 2d316371ed1..4ce6025d066 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -60,6 +60,11 @@ namespace kernels { const Array& mapping, \ distributed::Partition* partition) +#define GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE(LocalIndexType) \ + void build_ranges_from_global_size( \ + std::shared_ptr exec, int num_parts, \ + int64 global_size, Array& ranges) + #define GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType) \ void build_starting_indices(std::shared_ptr exec, \ const global_index_type* range_offsets, \ @@ -82,6 +87,8 @@ namespace kernels { template \ GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType); \ template \ + GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE(LocalIndexType); \ + template \ GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType); \ template \ GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 220c5b8225a..26078de8789 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -201,6 +201,19 @@ class Partition : public EnablePolymorphicObject>, std::shared_ptr exec, const Array& ranges); + /** + * Builds a partition by evenly distributing the global range. + * @param exec the Executor on which the partition should be built + * @param num_parts the number of parst used in this partition + * @param global_size the global size of this partition + * @return a Partition where each range has either + * `floor(global_size/num_parts)` or `floor(global_size/num_parts) + 1` + * indices. + */ + static std::unique_ptr build_from_global_size( + std::shared_ptr exec, comm_index_type num_parts, + global_index_type global_size); + /** * Creates a partition stored on the given executor with the given number of * consecutive ranges and parts. diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index cf12dbfe31f..d2fbcf433ee 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -93,6 +93,27 @@ void build_from_mapping(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); +template +void build_ranges_from_global_size(std::shared_ptr exec, + int num_parts, int64 global_size, + Array& ranges) +{ + const auto size_per_part = global_size / num_parts; + const auto rest = global_size - (num_parts * size_per_part); + + auto* ranges_ptr = ranges.get_data(); + + ranges_ptr[0] = 0; + for (int i = 1; i < num_parts + 1; ++i) { + ranges_ptr[i] = ranges_ptr[i - 1] + size_per_part + + static_cast((i - 1) < rest); + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); + + template void build_starting_indices(std::shared_ptr exec, const global_index_type* range_offsets, diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index 0e04bc9b88c..ed80e5ff0a4 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -162,6 +162,110 @@ TYPED_TEST(Partition, BuildsFromRanges) EXPECT_EQ(partition->get_part_sizes()[4], 1); } +TYPED_TEST(Partition, BuildsFromGlobalSize) +{ + using local_index_type = typename TestFixture::local_index_type; + + auto partition = + gko::distributed::Partition::build_from_global_size( + this->ref, 5, 13); + + EXPECT_EQ(partition->get_size(), 13); + EXPECT_EQ(partition->get_num_ranges(), 5); + EXPECT_EQ(partition->get_num_parts(), 5); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 3); + EXPECT_EQ(partition->get_const_range_bounds()[2], 6); + EXPECT_EQ(partition->get_const_range_bounds()[3], 9); + EXPECT_EQ(partition->get_const_range_bounds()[4], 11); + EXPECT_EQ(partition->get_const_range_bounds()[5], 13); + EXPECT_EQ(partition->get_part_ids()[0], 0); + EXPECT_EQ(partition->get_part_ids()[1], 1); + EXPECT_EQ(partition->get_part_ids()[2], 2); + EXPECT_EQ(partition->get_part_ids()[3], 3); + EXPECT_EQ(partition->get_part_ids()[4], 4); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 0); + EXPECT_EQ(partition->get_range_starting_indices()[4], 0); + EXPECT_EQ(partition->get_part_sizes()[0], 3); + EXPECT_EQ(partition->get_part_sizes()[1], 3); + EXPECT_EQ(partition->get_part_sizes()[2], 3); + EXPECT_EQ(partition->get_part_sizes()[3], 2); + EXPECT_EQ(partition->get_part_sizes()[4], 2); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) +{ + using local_index_type = typename TestFixture::local_index_type; + + auto partition = + gko::distributed::Partition::build_from_global_size( + this->ref, 5, 0); + + EXPECT_EQ(partition->get_size(), 0); + EXPECT_EQ(partition->get_num_ranges(), 5); + EXPECT_EQ(partition->get_num_parts(), 5); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 0); + EXPECT_EQ(partition->get_const_range_bounds()[2], 0); + EXPECT_EQ(partition->get_const_range_bounds()[3], 0); + EXPECT_EQ(partition->get_const_range_bounds()[4], 0); + EXPECT_EQ(partition->get_const_range_bounds()[5], 0); + EXPECT_EQ(partition->get_part_ids()[0], 0); + EXPECT_EQ(partition->get_part_ids()[1], 1); + EXPECT_EQ(partition->get_part_ids()[2], 2); + EXPECT_EQ(partition->get_part_ids()[3], 3); + EXPECT_EQ(partition->get_part_ids()[4], 4); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 0); + EXPECT_EQ(partition->get_range_starting_indices()[4], 0); + EXPECT_EQ(partition->get_part_sizes()[0], 0); + EXPECT_EQ(partition->get_part_sizes()[1], 0); + EXPECT_EQ(partition->get_part_sizes()[2], 0); + EXPECT_EQ(partition->get_part_sizes()[3], 0); + EXPECT_EQ(partition->get_part_sizes()[4], 0); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) +{ + using local_index_type = typename TestFixture::local_index_type; + + auto partition = + gko::distributed::Partition::build_from_global_size( + this->ref, 5, 3); + + EXPECT_EQ(partition->get_size(), 3); + EXPECT_EQ(partition->get_num_ranges(), 5); + EXPECT_EQ(partition->get_num_parts(), 5); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 1); + EXPECT_EQ(partition->get_const_range_bounds()[2], 2); + EXPECT_EQ(partition->get_const_range_bounds()[3], 3); + EXPECT_EQ(partition->get_const_range_bounds()[4], 3); + EXPECT_EQ(partition->get_const_range_bounds()[5], 3); + EXPECT_EQ(partition->get_part_ids()[0], 0); + EXPECT_EQ(partition->get_part_ids()[1], 1); + EXPECT_EQ(partition->get_part_ids()[2], 2); + EXPECT_EQ(partition->get_part_ids()[3], 3); + EXPECT_EQ(partition->get_part_ids()[4], 4); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 0); + EXPECT_EQ(partition->get_range_starting_indices()[4], 0); + EXPECT_EQ(partition->get_part_sizes()[0], 1); + EXPECT_EQ(partition->get_part_sizes()[1], 1); + EXPECT_EQ(partition->get_part_sizes()[2], 1); + EXPECT_EQ(partition->get_part_sizes()[3], 0); + EXPECT_EQ(partition->get_part_sizes()[4], 0); +} + TYPED_TEST(Partition, IsConnected) { diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index bca4e1e9853..9a9f6283fd1 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -311,4 +311,55 @@ TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) } +TYPED_TEST(Partition, BuildsFromGlobalSize) +{ + using local_index_type = typename TestFixture::local_index_type; + const int num_parts = 7; + const global_index_type global_size = 708; + + auto part = + gko::distributed::Partition::build_from_global_size( + this->ref, num_parts, global_size); + auto dpart = + gko::distributed::Partition::build_from_global_size( + this->exec, num_parts, global_size); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) +{ + using local_index_type = typename TestFixture::local_index_type; + const int num_parts = 7; + const global_index_type global_size = 0; + + auto part = + gko::distributed::Partition::build_from_global_size( + this->ref, num_parts, global_size); + auto dpart = + gko::distributed::Partition::build_from_global_size( + this->exec, num_parts, global_size); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) +{ + using local_index_type = typename TestFixture::local_index_type; + const int num_parts = 77; + const global_index_type global_size = 13; + + auto part = + gko::distributed::Partition::build_from_global_size( + this->ref, num_parts, global_size); + auto dpart = + gko::distributed::Partition::build_from_global_size( + this->exec, num_parts, global_size); + + this->assert_equal(part, dpart); +} + + } // namespace From 7219260b167c1a7a8537d0bfc66b348a6fbf252e Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 27 Oct 2021 10:26:42 +0200 Subject: [PATCH 51/59] move distributed typedefs to a centralized place --- include/ginkgo/core/base/types.hpp | 18 ++++++++++++++++++ include/ginkgo/core/distributed/partition.hpp | 5 +---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index d30739d5161..81a69ee8a0f 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -699,6 +699,24 @@ inline constexpr GKO_ATTRIBUTES IndexType invalid_index() } +namespace distributed { + + +/** + * Index type for global indices in a distributed system + */ +using global_index_type = int64; + + +/** + * Index type for enumerating processors + * + * Conforms to the MPI C interface of e.g. MPI rank or size + */ +using comm_index_type = int; + + +} // namespace distributed } // namespace gko diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 26078de8789..0840f3876c8 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -36,16 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include namespace gko { namespace distributed { -using global_index_type = int64; -using comm_index_type = int; - - /** * Represents a partition of a range of indices [0, size) into a disjoint set of * parts. The partition is stored as a set of consecutive ranges [begin, end) From fa194faafc4346387b2d794680de23eb3a37448f Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 27 Oct 2021 17:03:33 +0200 Subject: [PATCH 52/59] review updates - renaming - update kernels - add number of empty parts Co-authored-by: Tobias Ribizel Co-authored-by: Yu-Hsiang Tsai --- .../unified/distributed/partition_kernels.cpp | 27 ++- core/distributed/partition.cpp | 7 +- core/distributed/partition_kernels.hpp | 4 +- cuda/distributed/partition_kernels.cu | 13 +- dpcpp/distributed/partition_kernels.dp.cpp | 11 +- hip/distributed/partition_kernels.hip.cpp | 13 +- include/ginkgo/core/distributed/partition.hpp | 51 ++-- omp/distributed/partition_kernels.cpp | 13 +- omp/test/CMakeLists.txt | 1 - omp/test/distributed/CMakeLists.txt | 1 - omp/test/distributed/partition_kernels.cpp | 226 ------------------ reference/distributed/partition_kernels.cpp | 5 +- .../test/distributed/partition_kernels.cpp | 82 ++++++- test/distributed/partition_kernels.cpp | 94 ++++++-- 14 files changed, 237 insertions(+), 311 deletions(-) delete mode 100644 omp/test/distributed/CMakeLists.txt delete mode 100644 omp/test/distributed/partition_kernels.cpp diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp index 45595467d36..11cefbfac13 100644 --- a/common/unified/distributed/partition_kernels.cpp +++ b/common/unified/distributed/partition_kernels.cpp @@ -132,7 +132,7 @@ void build_ranges_from_global_size(std::shared_ptr exec, run_kernel( exec, [] GKO_KERNEL(auto i, auto size_per_part, auto rest, auto ranges) { - ranges[i] = size_per_part + static_cast(i < rest); + ranges[i] = size_per_part + (i < rest ? 1 : 0); }, ranges.get_num_elems() - 1, size_per_part, rest, ranges.get_data()); components::prefix_sum(exec, ranges.get_data(), ranges.get_num_elems()); @@ -142,6 +142,31 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); +template +void is_ordered(std::shared_ptr exec, + const distributed::Partition* partition, + bool* result) +{ + const auto part_ids = partition->get_const_part_ids(); + const auto num_ranges = partition->get_num_ranges(); + // it is necessary to use uint32 as a temporary result, since + // bool can't be used with suffles + Array result_uint32{exec, 1}; + run_kernel_reduction( + exec, + [] GKO_KERNEL(auto i, const auto part_ids) { + return static_cast(part_ids[i] < part_ids[i + 1]); + }, + [] GKO_KERNEL(const auto a, const auto b) { return a && b; }, + [] GKO_KERNEL(const auto a) { return a; }, uint32(1), + result_uint32.get_data(), num_ranges - 1, part_ids); + *result = static_cast( + exec->template copy_val_to_host(result_uint32.get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + + } // namespace partition } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index d93e836b0ea..0328b225db3 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -90,7 +90,7 @@ Partition::build_from_contiguous( template std::unique_ptr> -Partition::build_from_global_size( +Partition::build_from_global_size_uniform( std::shared_ptr exec, comm_index_type num_parts, global_index_type global_size) { @@ -107,14 +107,15 @@ void Partition::compute_range_starting_indices() auto exec = offsets_.get_executor(); exec->run(partition::make_build_starting_indices( offsets_.get_const_data(), part_ids_.get_const_data(), get_num_ranges(), - get_num_parts(), starting_indices_.get_data(), part_sizes_.get_data())); + get_num_parts(), num_empty_parts_, starting_indices_.get_data(), + part_sizes_.get_data())); } template bool Partition::is_connected() { - return get_num_parts() == get_num_ranges(); + return get_num_parts() - get_num_empty_parts() == get_num_ranges(); } diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index 4ce6025d066..223991ad878 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -69,8 +69,8 @@ namespace kernels { void build_starting_indices(std::shared_ptr exec, \ const global_index_type* range_offsets, \ const int* range_parts, size_type num_ranges, \ - int num_parts, LocalIndexType* ranks, \ - LocalIndexType* sizes) + int num_parts, int& num_empty_parts, \ + LocalIndexType* ranks, LocalIndexType* sizes) #define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) \ void is_ordered(std::shared_ptr exec, \ diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index 5b6b257c092..d6827f32540 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -55,8 +55,8 @@ template void build_starting_indices(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, - int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) { Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end @@ -111,19 +111,14 @@ void build_starting_indices(std::shared_ptr exec, : LocalIndexType{}; }, num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); + num_empty_parts = + thrust::count(thrust::device, sizes, sizes + num_parts, 0); } GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -template -void is_ordered(std::shared_ptr exec, - const distributed::Partition* partition, - bool* result) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); - - } // namespace partition } // namespace cuda } // namespace kernels diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 11d8d8e2bea..7d8b0f9f2f6 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -43,8 +43,8 @@ template void build_starting_indices(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, - int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) { Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end @@ -78,6 +78,7 @@ void build_starting_indices(std::shared_ptr exec, // TODO compute prefix_sum again // write back the results // TODO this needs to be adapted to the output of the algorithm above + // TODO count number of zeros in size and store in num_empty_parts run_kernel( exec, [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, @@ -101,12 +102,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -template -void is_ordered(std::shared_ptr exec, - const distributed::Partition* partition, - bool* result) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); - } // namespace partition } // namespace dpcpp } // namespace kernels diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index e7920f19dae..f64442e093f 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -55,8 +55,8 @@ template void build_starting_indices(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, - int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) { Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end @@ -111,19 +111,14 @@ void build_starting_indices(std::shared_ptr exec, : LocalIndexType{}; }, num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); + num_empty_parts = + thrust::count(thrust::device, sizes, sizes + num_parts, 0); } GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -template -void is_ordered(std::shared_ptr exec, - const distributed::Partition* partition, - bool* result) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); - - } // namespace partition } // namespace hip } // namespace kernels diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 0840f3876c8..0c7b5ad3530 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -85,19 +85,30 @@ class Partition : public EnablePolymorphicObject>, * Returns the number of ranges stored by this partition. * This size refers to the data returned by get_range_bounds(). */ - size_type get_num_ranges() const { return offsets_.get_num_elems() - 1; } + size_type get_num_ranges() const noexcept + { + return offsets_.get_num_elems() - 1; + } /** * Returns the number of parts represented in this partition. */ - comm_index_type get_num_parts() const { return num_parts_; } + comm_index_type get_num_parts() const noexcept { return num_parts_; } + + /** + * Returns the number of empty parts within this partition. + */ + comm_index_type get_num_empty_parts() const noexcept + { + return num_empty_parts_; + } /** * Returns the ranges boundary array stored by this partition. * `range_bounds[i]` is the beginning (inclusive) and * `range_bounds[i + 1]` is the end (exclusive) of the ith range. */ - const global_index_type* get_const_range_bounds() const + const global_index_type* get_const_range_bounds() const noexcept { return offsets_.get_const_data(); } @@ -105,14 +116,17 @@ class Partition : public EnablePolymorphicObject>, /** * @copydoc get_const_range_bounds() */ - global_index_type* get_range_bounds() { return offsets_.get_data(); } + global_index_type* get_range_bounds() noexcept + { + return offsets_.get_data(); + } /** * Returns the part ID array stored by this partition. * For each range from get_range_bounds(), it stores the part ID in the * range [0, get_num_parts() - 1]. */ - const comm_index_type* get_const_part_ids() const + const comm_index_type* get_const_part_ids() const noexcept { return part_ids_.get_const_data(); } @@ -132,14 +146,15 @@ class Partition : public EnablePolymorphicObject>, * Returns the part-local starting index for each range in this partition. * * Consider the partition on `[0, 10)` with - * > p_1 = [0-3, 7-9], - * > p_2 = [4-6]. - * Then `range_ranks[0] = 0`, `range_ranks[1] = 0`, `range_ranks[2] = 5`. - + * > p_1 = [0-4), [7-10), + * > p_2 = [4-7). + * Then `range_starting_indices[0] = 0`, `range_starting_indices[1] = 0`, + `range_starting_indices[2] = 5`. + * * @note These values can only be used after compute_range_ranks() was executed. */ - const local_index_type* get_range_starting_indices() const + const local_index_type* get_range_starting_indices() const noexcept { return starting_indices_.get_const_data(); } @@ -148,7 +163,7 @@ class Partition : public EnablePolymorphicObject>, * Returns the part size array. * part_sizes[p] stores the number of elements in part `p`. */ - const local_index_type* get_part_sizes() const + const local_index_type* get_part_sizes() const noexcept { return part_sizes_.get_const_data(); } @@ -164,12 +179,12 @@ class Partition : public EnablePolymorphicObject>, } /** - * Checks if each part is associated with a contiguous range. + * Checks if each part is associated with at most one contiguous range. */ bool is_connected(); /** - * Checks if the ranges are orderd by their part index. + * Checks if the ranges are ordered by their part index. * * Implies that the partition is connected. */ @@ -178,9 +193,11 @@ class Partition : public EnablePolymorphicObject>, /** * Builds a partition from a given mapping global_index -> part_id. + * * @param exec the Executor on which the partition should be built * @param mapping the mapping from global indices to part IDs. * @param num_parts the number of parts used in the mapping. + * * @return a Partition representing the given mapping as a set of ranges */ static std::unique_ptr build_from_mapping( @@ -189,9 +206,11 @@ class Partition : public EnablePolymorphicObject>, /** * Builds a partition consisting of contiguous ranges, one for each part. + * * @param exec the Executor on which the partition should be built * @param ranges the boundaries of the ranges representing each part. Part i contains the indices [ranges[i], ranges[i + 1]). + * @return a Partition representing the given contiguous partitioning. */ static std::unique_ptr build_from_contiguous( @@ -200,14 +219,16 @@ class Partition : public EnablePolymorphicObject>, /** * Builds a partition by evenly distributing the global range. + * * @param exec the Executor on which the partition should be built * @param num_parts the number of parst used in this partition * @param global_size the global size of this partition + * * @return a Partition where each range has either * `floor(global_size/num_parts)` or `floor(global_size/num_parts) + 1` * indices. */ - static std::unique_ptr build_from_global_size( + static std::unique_ptr build_from_global_size_uniform( std::shared_ptr exec, comm_index_type num_parts, global_index_type global_size); @@ -219,6 +240,7 @@ class Partition : public EnablePolymorphicObject>, comm_index_type num_parts = 0, size_type num_ranges = 0) : EnablePolymorphicObject{exec}, num_parts_{num_parts}, + num_empty_parts_{0}, offsets_{exec, num_ranges + 1}, starting_indices_{exec, num_ranges}, part_sizes_{exec, static_cast(num_parts)}, @@ -232,6 +254,7 @@ class Partition : public EnablePolymorphicObject>, private: comm_index_type num_parts_; + comm_index_type num_empty_parts_; Array offsets_; Array starting_indices_; Array part_sizes_; diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index 32e5ecf569e..a393f80cb8d 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -49,8 +49,8 @@ template void build_starting_indices(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, - int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) { std::fill_n(sizes, num_parts, 0); auto num_threads = static_cast(omp_get_max_threads()); @@ -72,7 +72,7 @@ void build_starting_indices(std::shared_ptr exec, } #pragma omp barrier // exclusive prefix sum over local sizes -#pragma omp for +#pragma omp for reduction(+ : num_empty_parts) for (comm_index_type part = 0; part < num_parts; ++part) { LocalIndexType size{}; for (size_type thread = 0; thread < num_threads; ++thread) { @@ -82,6 +82,7 @@ void build_starting_indices(std::shared_ptr exec, size += local_size; } sizes[part] = size; + num_empty_parts += size == 0 ? 1 : 0; } // add global baselines to local ranks for (auto range = thread_begin; range < thread_end; range++) { @@ -95,12 +96,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -template -void is_ordered(std::shared_ptr exec, - const distributed::Partition* partition, - bool* result) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); - } // namespace partition } // namespace omp } // namespace kernels diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index 30d801a186e..35590c60df1 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,7 +2,6 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) -add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(multigrid) diff --git a/omp/test/distributed/CMakeLists.txt b/omp/test/distributed/CMakeLists.txt deleted file mode 100644 index 78a626512af..00000000000 --- a/omp/test/distributed/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -ginkgo_create_test(partition_kernels) diff --git a/omp/test/distributed/partition_kernels.cpp b/omp/test/distributed/partition_kernels.cpp deleted file mode 100644 index 0e04bc9b88c..00000000000 --- a/omp/test/distributed/partition_kernels.cpp +++ /dev/null @@ -1,226 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include -#include -#include - - -#include -#include - - -#include - - -#include "core/distributed/partition_kernels.hpp" -#include "core/test/utils.hpp" - - -namespace { - - -using global_index_type = gko::distributed::global_index_type; -using comm_index_type = gko::distributed::comm_index_type; - - -template -class Partition : public ::testing::Test { -protected: - using local_index_type = LocalIndexType; - Partition() : ref(gko::ReferenceExecutor::create()) {} - - std::shared_ptr ref; -}; - -TYPED_TEST_SUITE(Partition, gko::test::IndexTypes); - - -TYPED_TEST(Partition, BuildsFromMapping) -{ - using local_index_type = typename TestFixture::local_index_type; - gko::Array mapping{ - this->ref, {2, 2, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0}}; - comm_index_type num_parts = 3; - gko::size_type num_ranges = 10; - - auto partition = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - - EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); - EXPECT_EQ(partition->get_num_ranges(), num_ranges); - EXPECT_EQ(partition->get_num_parts(), num_parts); - EXPECT_EQ(partition->get_const_range_bounds(), - partition->get_range_bounds()); - EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 2); - EXPECT_EQ(partition->get_const_range_bounds()[2], 3); - EXPECT_EQ(partition->get_const_range_bounds()[3], 5); - EXPECT_EQ(partition->get_const_range_bounds()[4], 6); - EXPECT_EQ(partition->get_const_range_bounds()[5], 8); - EXPECT_EQ(partition->get_const_range_bounds()[6], 9); - EXPECT_EQ(partition->get_const_range_bounds()[7], 10); - EXPECT_EQ(partition->get_const_range_bounds()[8], 13); - EXPECT_EQ(partition->get_const_range_bounds()[9], 15); - EXPECT_EQ(partition->get_const_range_bounds()[10], 16); - EXPECT_EQ(partition->get_part_ids()[0], 2); - EXPECT_EQ(partition->get_part_ids()[1], 0); - EXPECT_EQ(partition->get_part_ids()[2], 1); - EXPECT_EQ(partition->get_part_ids()[3], 2); - EXPECT_EQ(partition->get_part_ids()[4], 0); - EXPECT_EQ(partition->get_part_ids()[5], 1); - EXPECT_EQ(partition->get_part_ids()[6], 0); - EXPECT_EQ(partition->get_part_ids()[7], 1); - EXPECT_EQ(partition->get_part_ids()[8], 2); - EXPECT_EQ(partition->get_part_ids()[9], 0); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 2); - EXPECT_EQ(partition->get_range_starting_indices()[4], 1); - EXPECT_EQ(partition->get_range_starting_indices()[5], 2); - EXPECT_EQ(partition->get_range_starting_indices()[6], 3); - EXPECT_EQ(partition->get_range_starting_indices()[7], 3); - EXPECT_EQ(partition->get_range_starting_indices()[8], 3); - EXPECT_EQ(partition->get_range_starting_indices()[9], 4); - EXPECT_EQ(partition->get_part_sizes()[0], 5); - EXPECT_EQ(partition->get_part_sizes()[1], 6); - EXPECT_EQ(partition->get_part_sizes()[2], 5); -} - - -TYPED_TEST(Partition, BuildsFromRanges) -{ - using local_index_type = typename TestFixture::local_index_type; - gko::Array ranges{this->ref, {0, 5, 5, 7, 9, 10}}; - - auto partition = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); - - EXPECT_EQ(partition->get_size(), - ranges.get_const_data()[ranges.get_num_elems() - 1]); - EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1); - EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1); - EXPECT_EQ(partition->get_const_range_bounds(), - partition->get_range_bounds()); - EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 5); - EXPECT_EQ(partition->get_const_range_bounds()[2], 5); - EXPECT_EQ(partition->get_const_range_bounds()[3], 7); - EXPECT_EQ(partition->get_const_range_bounds()[4], 9); - EXPECT_EQ(partition->get_const_range_bounds()[5], 10); - EXPECT_EQ(partition->get_part_ids()[0], 0); - EXPECT_EQ(partition->get_part_ids()[1], 1); - EXPECT_EQ(partition->get_part_ids()[2], 2); - EXPECT_EQ(partition->get_part_ids()[3], 3); - EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 0); - EXPECT_EQ(partition->get_range_starting_indices()[4], 0); - EXPECT_EQ(partition->get_part_sizes()[0], 5); - EXPECT_EQ(partition->get_part_sizes()[1], 0); - EXPECT_EQ(partition->get_part_sizes()[2], 2); - EXPECT_EQ(partition->get_part_sizes()[3], 2); - EXPECT_EQ(partition->get_part_sizes()[4], 1); -} - - -TYPED_TEST(Partition, IsConnected) -{ - using local_index_type = typename TestFixture::local_index_type; - auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, - 3)); - - ASSERT_TRUE(part->is_connected()); -} - - -TYPED_TEST(Partition, IsConnectedUnordered) -{ - using local_index_type = typename TestFixture::local_index_type; - auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, - 3)); - - ASSERT_TRUE(part->is_connected()); -} - - -TYPED_TEST(Partition, IsConnectedFail) -{ - using local_index_type = typename TestFixture::local_index_type; - auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, - 3)); - - ASSERT_FALSE(part->is_connected()); -} - - -TYPED_TEST(Partition, IsOrdered) -{ - using local_index_type = typename TestFixture::local_index_type; - auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, - 3)); - - ASSERT_FALSE(part->is_ordered()); -} - - -TYPED_TEST(Partition, IsOrderedFail) -{ - using local_index_type = typename TestFixture::local_index_type; - auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, - 3)); - - ASSERT_TRUE(part->is_ordered()); -} - - -} // namespace diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index d2fbcf433ee..f160fb87447 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -118,8 +118,8 @@ template void build_starting_indices(std::shared_ptr exec, const global_index_type* range_offsets, const int* range_parts, size_type num_ranges, - int num_parts, LocalIndexType* ranks, - LocalIndexType* sizes) + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) { std::fill_n(sizes, num_parts, 0); for (size_type range = 0; range < num_ranges; ++range) { @@ -130,6 +130,7 @@ void build_starting_indices(std::shared_ptr exec, ranks[range] = rank; sizes[part] += end - begin; } + num_empty_parts = std::count(sizes, sizes + num_parts, 0); } GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index ed80e5ff0a4..6f4bd548f4d 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -123,6 +123,61 @@ TYPED_TEST(Partition, BuildsFromMapping) } +TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) +{ + using local_index_type = typename TestFixture::local_index_type; + gko::Array mapping{ + this->ref, {3, 3, 0, 1, 1, 3, 0, 0, 1, 0, 1, 1, 1, 3, 3, 0}}; + comm_index_type num_parts = 5; + gko::size_type num_ranges = 10; + + auto partition = + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); + + EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); + EXPECT_EQ(partition->get_num_ranges(), num_ranges); + EXPECT_EQ(partition->get_num_parts(), num_parts); + EXPECT_EQ(partition->get_num_empty_parts(), 2); + EXPECT_EQ(partition->get_const_range_bounds()[0], 0); + EXPECT_EQ(partition->get_const_range_bounds()[1], 2); + EXPECT_EQ(partition->get_const_range_bounds()[2], 3); + EXPECT_EQ(partition->get_const_range_bounds()[3], 5); + EXPECT_EQ(partition->get_const_range_bounds()[4], 6); + EXPECT_EQ(partition->get_const_range_bounds()[5], 8); + EXPECT_EQ(partition->get_const_range_bounds()[6], 9); + EXPECT_EQ(partition->get_const_range_bounds()[7], 10); + EXPECT_EQ(partition->get_const_range_bounds()[8], 13); + EXPECT_EQ(partition->get_const_range_bounds()[9], 15); + EXPECT_EQ(partition->get_const_range_bounds()[10], 16); + EXPECT_EQ(partition->get_part_ids()[0], 3); + EXPECT_EQ(partition->get_part_ids()[1], 0); + EXPECT_EQ(partition->get_part_ids()[2], 1); + EXPECT_EQ(partition->get_part_ids()[3], 3); + EXPECT_EQ(partition->get_part_ids()[4], 0); + EXPECT_EQ(partition->get_part_ids()[5], 1); + EXPECT_EQ(partition->get_part_ids()[6], 0); + EXPECT_EQ(partition->get_part_ids()[7], 1); + EXPECT_EQ(partition->get_part_ids()[8], 3); + EXPECT_EQ(partition->get_part_ids()[9], 0); + EXPECT_EQ(partition->get_range_starting_indices()[0], 0); + EXPECT_EQ(partition->get_range_starting_indices()[1], 0); + EXPECT_EQ(partition->get_range_starting_indices()[2], 0); + EXPECT_EQ(partition->get_range_starting_indices()[3], 2); + EXPECT_EQ(partition->get_range_starting_indices()[4], 1); + EXPECT_EQ(partition->get_range_starting_indices()[5], 2); + EXPECT_EQ(partition->get_range_starting_indices()[6], 3); + EXPECT_EQ(partition->get_range_starting_indices()[7], 3); + EXPECT_EQ(partition->get_range_starting_indices()[8], 3); + EXPECT_EQ(partition->get_range_starting_indices()[9], 4); + EXPECT_EQ(partition->get_part_sizes()[0], 5); + EXPECT_EQ(partition->get_part_sizes()[1], 6); + EXPECT_EQ(partition->get_part_sizes()[2], 0); + EXPECT_EQ(partition->get_part_sizes()[3], 5); + EXPECT_EQ(partition->get_part_sizes()[4], 0); +} + + TYPED_TEST(Partition, BuildsFromRanges) { using local_index_type = typename TestFixture::local_index_type; @@ -166,9 +221,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) { using local_index_type = typename TestFixture::local_index_type; - auto partition = - gko::distributed::Partition::build_from_global_size( - this->ref, 5, 13); + auto partition = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->ref, 5, 13); EXPECT_EQ(partition->get_size(), 13); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -201,9 +255,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) { using local_index_type = typename TestFixture::local_index_type; - auto partition = - gko::distributed::Partition::build_from_global_size( - this->ref, 5, 0); + auto partition = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->ref, 5, 0); EXPECT_EQ(partition->get_size(), 0); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -236,9 +289,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) { using local_index_type = typename TestFixture::local_index_type; - auto partition = - gko::distributed::Partition::build_from_global_size( - this->ref, 5, 3); + auto partition = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->ref, 5, 3); EXPECT_EQ(partition->get_size(), 3); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -279,6 +331,18 @@ TYPED_TEST(Partition, IsConnected) } +TYPED_TEST(Partition, IsConnectedWithEmptyParts) +{ + using local_index_type = typename TestFixture::local_index_type; + auto part = gko::share( + gko::distributed::Partition::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 2, 2, 5}}, + 6)); + + ASSERT_TRUE(part->is_connected()); +} + + TYPED_TEST(Partition, IsConnectedUnordered) { using local_index_type = typename TestFixture::local_index_type; diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 9a9f6283fd1..04cd88b5ae0 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -83,6 +83,7 @@ class Partition : public ::testing::Test { ASSERT_EQ(part->get_size(), dpart->get_size()); ASSERT_EQ(part->get_num_ranges(), dpart->get_num_ranges()); ASSERT_EQ(part->get_num_parts(), dpart->get_num_parts()); + ASSERT_EQ(part->get_num_empty_parts(), dpart->get_num_empty_parts()); GKO_ASSERT_ARRAY_EQ( gko::make_array_view(this->ref, part->get_num_ranges() + 1, part->get_range_bounds()), @@ -317,12 +318,12 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) const int num_parts = 7; const global_index_type global_size = 708; - auto part = - gko::distributed::Partition::build_from_global_size( - this->ref, num_parts, global_size); - auto dpart = - gko::distributed::Partition::build_from_global_size( - this->exec, num_parts, global_size); + auto part = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->exec, num_parts, + global_size); this->assert_equal(part, dpart); } @@ -334,12 +335,12 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) const int num_parts = 7; const global_index_type global_size = 0; - auto part = - gko::distributed::Partition::build_from_global_size( - this->ref, num_parts, global_size); - auto dpart = - gko::distributed::Partition::build_from_global_size( - this->exec, num_parts, global_size); + auto part = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->exec, num_parts, + global_size); this->assert_equal(part, dpart); } @@ -351,14 +352,73 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) const int num_parts = 77; const global_index_type global_size = 13; + auto part = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = gko::distributed::Partition< + local_index_type>::build_from_global_size_uniform(this->exec, num_parts, + global_size); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, IsOrderedTrue) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + gko::size_type size_per_part = 1000; + gko::size_type global_size = num_parts * size_per_part; + gko::Array mapping{this->ref, global_size}; + for (comm_index_type i = 0; i < num_parts; ++i) { + std::fill(mapping.get_data() + i * size_per_part, + mapping.get_data() + (i + 1) * size_per_part, i); + } + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, mapping, num_parts); + + ASSERT_TRUE(dpart->is_ordered()); +} + + +TYPED_TEST(Partition, IsOrderedFail) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + gko::size_type size_per_part = 1000; + gko::size_type global_size = num_parts * size_per_part; + gko::Array mapping{this->ref, global_size}; + for (comm_index_type i = 0; i < num_parts; ++i) { + std::fill(mapping.get_data() + i * size_per_part, + mapping.get_data() + (i + 1) * size_per_part, + num_parts - 1 - i); + } + auto dpart = + gko::distributed::Partition::build_from_mapping( + this->exec, mapping, num_parts); + + ASSERT_FALSE(dpart->is_ordered()); +} + + +TYPED_TEST(Partition, IsOrderedRandom) +{ + using local_index_type = typename TestFixture::local_index_type; + comm_index_type num_parts = 7; + std::uniform_int_distribution part_dist{0, num_parts - 1}; + gko::Array mapping{this->ref, 10000}; + for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { + mapping.get_data()[i] = part_dist(this->rand_engine); + } auto part = - gko::distributed::Partition::build_from_global_size( - this->ref, num_parts, global_size); + gko::distributed::Partition::build_from_mapping( + this->ref, mapping, num_parts); auto dpart = - gko::distributed::Partition::build_from_global_size( - this->exec, num_parts, global_size); + gko::distributed::Partition::build_from_mapping( + this->exec, mapping, num_parts); - this->assert_equal(part, dpart); + ASSERT_EQ(part->is_ordered(), dpart->is_ordered()); } From eb9f52f02135fbc5fab4cf34c9ef9acad048bf8c Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 28 Oct 2021 13:07:35 +0200 Subject: [PATCH 53/59] allow only const access to partition data --- .../unified/distributed/partition_kernels.cpp | 30 ++---- core/device_hooks/common_kernels.inc.cpp | 6 +- core/distributed/partition.cpp | 10 +- core/distributed/partition_kernels.hpp | 36 +++---- include/ginkgo/core/distributed/partition.hpp | 19 +--- reference/distributed/partition_kernels.cpp | 41 +++---- .../test/distributed/partition_kernels.cpp | 100 ++++++++---------- test/distributed/partition_kernels.cpp | 20 ++-- 8 files changed, 115 insertions(+), 147 deletions(-) diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp index 11cefbfac13..4318c0e7e19 100644 --- a/common/unified/distributed/partition_kernels.cpp +++ b/common/unified/distributed/partition_kernels.cpp @@ -62,10 +62,10 @@ void count_ranges(std::shared_ptr exec, } -template void build_from_contiguous(std::shared_ptr exec, const Array& ranges, - distributed::Partition* partition) + global_index_type* range_bounds, + comm_index_type* part_ids) { run_kernel( exec, @@ -76,18 +76,14 @@ void build_from_contiguous(std::shared_ptr exec, bounds[i + 1] = ranges[i + 1]; ids[i] = i; }, - ranges.get_num_elems() - 1, ranges, partition->get_range_bounds(), - partition->get_part_ids()); + ranges.get_num_elems() - 1, ranges, range_bounds, part_ids); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); - -template void build_from_mapping(std::shared_ptr exec, const Array& mapping, - distributed::Partition* partition) + global_index_type* range_bounds, + comm_index_type* part_ids) { Array range_index_ranks{exec, mapping.get_num_elems() + 1}; run_kernel( @@ -115,17 +111,14 @@ void build_from_mapping(std::shared_ptr exec, } }, mapping.get_num_elems() + 1, mapping.get_num_elems(), mapping, - range_index_ranks, partition->get_range_bounds(), - partition->get_part_ids()); + range_index_ranks, range_bounds, part_ids); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); - -template void build_ranges_from_global_size(std::shared_ptr exec, - int num_parts, int64 global_size, - Array& ranges) + comm_index_type num_parts, + global_index_type global_size, + Array& ranges) { const auto size_per_part = global_size / num_parts; const auto rest = global_size - (num_parts * size_per_part); @@ -138,16 +131,13 @@ void build_ranges_from_global_size(std::shared_ptr exec, components::prefix_sum(exec, ranges.get_data(), ranges.get_num_elems()); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); - template void is_ordered(std::shared_ptr exec, const distributed::Partition* partition, bool* result) { - const auto part_ids = partition->get_const_part_ids(); + const auto part_ids = partition->get_part_ids(); const auto num_ranges = partition->get_num_ranges(); // it is necessary to use uint32 as a temporary result, since // bool can't be used with suffles diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index a0a8893c952..461f9a48067 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -215,10 +215,10 @@ namespace partition { GKO_STUB(GKO_PARTITION_COUNT_RANGES); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); +GKO_STUB(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); +GKO_STUB(GKO_PARTITION_BUILD_FROM_MAPPING); +GKO_STUB(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index 0328b225db3..e5804ec31f1 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -64,8 +64,9 @@ Partition::build_from_mapping( size_type num_ranges{}; exec->run(partition::make_count_ranges(*local_mapping.get(), num_ranges)); auto result = Partition::create(exec, num_parts, num_ranges); - exec->run( - partition::make_build_from_mapping(*local_mapping.get(), result.get())); + exec->run(partition::make_build_from_mapping(*local_mapping.get(), + result->offsets_.get_data(), + result->part_ids_.get_data())); result->compute_range_starting_indices(); return result; } @@ -81,8 +82,9 @@ Partition::build_from_contiguous( auto result = Partition::create( exec, static_cast(ranges.get_num_elems() - 1), ranges.get_num_elems() - 1); - exec->run(partition::make_build_from_contiguous(*local_ranges.get(), - result.get())); + exec->run(partition::make_build_from_contiguous( + *local_ranges.get(), result->offsets_.get_data(), + result->part_ids_.get_data())); result->compute_range_starting_indices(); return result; } diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index 223991ad878..6de6bfd723f 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -49,27 +49,30 @@ namespace kernels { const Array& mapping, \ size_type& num_ranges) -#define GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType) \ - void build_from_contiguous( \ - std::shared_ptr exec, \ - const Array& ranges, \ - distributed::Partition* partition) +#define GKO_PARTITION_BUILD_FROM_CONTIGUOUS \ + void build_from_contiguous(std::shared_ptr exec, \ + const Array& ranges, \ + global_index_type* range_bounds, \ + comm_index_type* part_ids) -#define GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType) \ +#define GKO_PARTITION_BUILD_FROM_MAPPING \ void build_from_mapping(std::shared_ptr exec, \ const Array& mapping, \ - distributed::Partition* partition) + global_index_type* range_bounds, \ + comm_index_type* part_ids) -#define GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE(LocalIndexType) \ - void build_ranges_from_global_size( \ - std::shared_ptr exec, int num_parts, \ - int64 global_size, Array& ranges) +#define GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE \ + void build_ranges_from_global_size( \ + std::shared_ptr exec, \ + comm_index_type num_parts, global_index_type global_size, \ + Array& ranges) #define GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType) \ void build_starting_indices(std::shared_ptr exec, \ const global_index_type* range_offsets, \ const int* range_parts, size_type num_ranges, \ - int num_parts, int& num_empty_parts, \ + comm_index_type num_parts, \ + comm_index_type& num_empty_parts, \ LocalIndexType* ranks, LocalIndexType* sizes) #define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) \ @@ -82,12 +85,9 @@ namespace kernels { using global_index_type = distributed::global_index_type; \ using comm_index_type = distributed::comm_index_type; \ GKO_PARTITION_COUNT_RANGES; \ - template \ - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS(LocalIndexType); \ - template \ - GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING(LocalIndexType); \ - template \ - GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE(LocalIndexType); \ + GKO_PARTITION_BUILD_FROM_CONTIGUOUS; \ + GKO_PARTITION_BUILD_FROM_MAPPING; \ + GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE; \ template \ GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType); \ template \ diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 0c7b5ad3530..b17b312e9e0 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -108,34 +108,21 @@ class Partition : public EnablePolymorphicObject>, * `range_bounds[i]` is the beginning (inclusive) and * `range_bounds[i + 1]` is the end (exclusive) of the ith range. */ - const global_index_type* get_const_range_bounds() const noexcept + const global_index_type* get_range_bounds() const noexcept { return offsets_.get_const_data(); } - /** - * @copydoc get_const_range_bounds() - */ - global_index_type* get_range_bounds() noexcept - { - return offsets_.get_data(); - } - /** * Returns the part ID array stored by this partition. * For each range from get_range_bounds(), it stores the part ID in the * range [0, get_num_parts() - 1]. */ - const comm_index_type* get_const_part_ids() const noexcept + const comm_index_type* get_part_ids() const noexcept { return part_ids_.get_const_data(); } - /** - * @copydoc get_const_part_ids() - */ - comm_index_type* get_part_ids() { return part_ids_.get_data(); } - /** * Compute the range_starting_indices and part_sizes based on the current * range_bounds and part_ids. @@ -232,6 +219,7 @@ class Partition : public EnablePolymorphicObject>, std::shared_ptr exec, comm_index_type num_parts, global_index_type global_size); +private: /** * Creates a partition stored on the given executor with the given number of * consecutive ranges and parts. @@ -252,7 +240,6 @@ class Partition : public EnablePolymorphicObject>, part_ids_.fill(0); } -private: comm_index_type num_parts_; comm_index_type num_empty_parts_; Array offsets_; diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index f160fb87447..9b39ad6f974 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -52,51 +52,45 @@ void count_ranges(std::shared_ptr exec, } -template void build_from_contiguous(std::shared_ptr exec, const Array& ranges, - distributed::Partition* partition) + global_index_type* range_bounds, + comm_index_type* part_ids) { - partition->get_range_bounds()[0] = 0; + range_bounds[0] = 0; for (comm_index_type i = 0; i < ranges.get_num_elems() - 1; i++) { - auto begin = ranges.get_const_data()[i]; auto end = ranges.get_const_data()[i + 1]; - partition->get_range_bounds()[i + 1] = end; - partition->get_part_ids()[i] = i; + range_bounds[i + 1] = end; + part_ids[i] = i; } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_CONTIGUOUS); - -template void build_from_mapping(std::shared_ptr exec, const Array& mapping, - distributed::Partition* partition) + global_index_type* range_bounds, + comm_index_type* part_ids) { size_type range_idx{}; comm_index_type range_part{-1}; for (size_type i = 0; i < mapping.get_num_elems(); i++) { auto cur_part = mapping.get_const_data()[i]; if (cur_part != range_part) { - partition->get_range_bounds()[range_idx] = i; - partition->get_part_ids()[range_idx] = cur_part; + range_bounds[range_idx] = i; + part_ids[range_idx] = cur_part; range_idx++; range_part = cur_part; } } - partition->get_range_bounds()[range_idx] = + range_bounds[range_idx] = static_cast(mapping.get_num_elems()); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_FROM_MAPPING); - -template void build_ranges_from_global_size(std::shared_ptr exec, - int num_parts, int64 global_size, - Array& ranges) + comm_index_type num_parts, + global_index_type global_size, + Array& ranges) { const auto size_per_part = global_size / num_parts; const auto rest = global_size - (num_parts * size_per_part); @@ -105,14 +99,11 @@ void build_ranges_from_global_size(std::shared_ptr exec, ranges_ptr[0] = 0; for (int i = 1; i < num_parts + 1; ++i) { - ranges_ptr[i] = ranges_ptr[i - 1] + size_per_part + - static_cast((i - 1) < rest); + ranges_ptr[i] = + ranges_ptr[i - 1] + size_per_part + ((i - 1) < rest ? 1 : 0); } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_FROM_GLOBAL_SIZE); - template void build_starting_indices(std::shared_ptr exec, @@ -142,7 +133,7 @@ void is_ordered(std::shared_ptr exec, bool* result) { *result = true; - auto part_ids = partition->get_const_part_ids(); + auto part_ids = partition->get_part_ids(); for (comm_index_type i = 1; i < partition->get_num_ranges(); ++i) { if (part_ids[i] < part_ids[i - 1]) { diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index 6f4bd548f4d..d2048af7eef 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -83,20 +83,17 @@ TYPED_TEST(Partition, BuildsFromMapping) EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); EXPECT_EQ(partition->get_num_ranges(), num_ranges); EXPECT_EQ(partition->get_num_parts(), num_parts); - EXPECT_EQ(partition->get_const_range_bounds(), - partition->get_range_bounds()); - EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 2); - EXPECT_EQ(partition->get_const_range_bounds()[2], 3); - EXPECT_EQ(partition->get_const_range_bounds()[3], 5); - EXPECT_EQ(partition->get_const_range_bounds()[4], 6); - EXPECT_EQ(partition->get_const_range_bounds()[5], 8); - EXPECT_EQ(partition->get_const_range_bounds()[6], 9); - EXPECT_EQ(partition->get_const_range_bounds()[7], 10); - EXPECT_EQ(partition->get_const_range_bounds()[8], 13); - EXPECT_EQ(partition->get_const_range_bounds()[9], 15); - EXPECT_EQ(partition->get_const_range_bounds()[10], 16); + EXPECT_EQ(partition->get_range_bounds()[0], 0); + EXPECT_EQ(partition->get_range_bounds()[1], 2); + EXPECT_EQ(partition->get_range_bounds()[2], 3); + EXPECT_EQ(partition->get_range_bounds()[3], 5); + EXPECT_EQ(partition->get_range_bounds()[4], 6); + EXPECT_EQ(partition->get_range_bounds()[5], 8); + EXPECT_EQ(partition->get_range_bounds()[6], 9); + EXPECT_EQ(partition->get_range_bounds()[7], 10); + EXPECT_EQ(partition->get_range_bounds()[8], 13); + EXPECT_EQ(partition->get_range_bounds()[9], 15); + EXPECT_EQ(partition->get_range_bounds()[10], 16); EXPECT_EQ(partition->get_part_ids()[0], 2); EXPECT_EQ(partition->get_part_ids()[1], 0); EXPECT_EQ(partition->get_part_ids()[2], 1); @@ -139,17 +136,17 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) EXPECT_EQ(partition->get_num_ranges(), num_ranges); EXPECT_EQ(partition->get_num_parts(), num_parts); EXPECT_EQ(partition->get_num_empty_parts(), 2); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 2); - EXPECT_EQ(partition->get_const_range_bounds()[2], 3); - EXPECT_EQ(partition->get_const_range_bounds()[3], 5); - EXPECT_EQ(partition->get_const_range_bounds()[4], 6); - EXPECT_EQ(partition->get_const_range_bounds()[5], 8); - EXPECT_EQ(partition->get_const_range_bounds()[6], 9); - EXPECT_EQ(partition->get_const_range_bounds()[7], 10); - EXPECT_EQ(partition->get_const_range_bounds()[8], 13); - EXPECT_EQ(partition->get_const_range_bounds()[9], 15); - EXPECT_EQ(partition->get_const_range_bounds()[10], 16); + EXPECT_EQ(partition->get_range_bounds()[0], 0); + EXPECT_EQ(partition->get_range_bounds()[1], 2); + EXPECT_EQ(partition->get_range_bounds()[2], 3); + EXPECT_EQ(partition->get_range_bounds()[3], 5); + EXPECT_EQ(partition->get_range_bounds()[4], 6); + EXPECT_EQ(partition->get_range_bounds()[5], 8); + EXPECT_EQ(partition->get_range_bounds()[6], 9); + EXPECT_EQ(partition->get_range_bounds()[7], 10); + EXPECT_EQ(partition->get_range_bounds()[8], 13); + EXPECT_EQ(partition->get_range_bounds()[9], 15); + EXPECT_EQ(partition->get_range_bounds()[10], 16); EXPECT_EQ(partition->get_part_ids()[0], 3); EXPECT_EQ(partition->get_part_ids()[1], 0); EXPECT_EQ(partition->get_part_ids()[2], 1); @@ -188,18 +185,15 @@ TYPED_TEST(Partition, BuildsFromRanges) this->ref, ranges); EXPECT_EQ(partition->get_size(), - ranges.get_const_data()[ranges.get_num_elems() - 1]); + ranges.get_data()[ranges.get_num_elems() - 1]); EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1); EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1); - EXPECT_EQ(partition->get_const_range_bounds(), - partition->get_range_bounds()); - EXPECT_EQ(partition->get_const_part_ids(), partition->get_part_ids()); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 5); - EXPECT_EQ(partition->get_const_range_bounds()[2], 5); - EXPECT_EQ(partition->get_const_range_bounds()[3], 7); - EXPECT_EQ(partition->get_const_range_bounds()[4], 9); - EXPECT_EQ(partition->get_const_range_bounds()[5], 10); + EXPECT_EQ(partition->get_range_bounds()[0], 0); + EXPECT_EQ(partition->get_range_bounds()[1], 5); + EXPECT_EQ(partition->get_range_bounds()[2], 5); + EXPECT_EQ(partition->get_range_bounds()[3], 7); + EXPECT_EQ(partition->get_range_bounds()[4], 9); + EXPECT_EQ(partition->get_range_bounds()[5], 10); EXPECT_EQ(partition->get_part_ids()[0], 0); EXPECT_EQ(partition->get_part_ids()[1], 1); EXPECT_EQ(partition->get_part_ids()[2], 2); @@ -227,12 +221,12 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) EXPECT_EQ(partition->get_size(), 13); EXPECT_EQ(partition->get_num_ranges(), 5); EXPECT_EQ(partition->get_num_parts(), 5); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 3); - EXPECT_EQ(partition->get_const_range_bounds()[2], 6); - EXPECT_EQ(partition->get_const_range_bounds()[3], 9); - EXPECT_EQ(partition->get_const_range_bounds()[4], 11); - EXPECT_EQ(partition->get_const_range_bounds()[5], 13); + EXPECT_EQ(partition->get_range_bounds()[0], 0); + EXPECT_EQ(partition->get_range_bounds()[1], 3); + EXPECT_EQ(partition->get_range_bounds()[2], 6); + EXPECT_EQ(partition->get_range_bounds()[3], 9); + EXPECT_EQ(partition->get_range_bounds()[4], 11); + EXPECT_EQ(partition->get_range_bounds()[5], 13); EXPECT_EQ(partition->get_part_ids()[0], 0); EXPECT_EQ(partition->get_part_ids()[1], 1); EXPECT_EQ(partition->get_part_ids()[2], 2); @@ -261,12 +255,12 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) EXPECT_EQ(partition->get_size(), 0); EXPECT_EQ(partition->get_num_ranges(), 5); EXPECT_EQ(partition->get_num_parts(), 5); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 0); - EXPECT_EQ(partition->get_const_range_bounds()[2], 0); - EXPECT_EQ(partition->get_const_range_bounds()[3], 0); - EXPECT_EQ(partition->get_const_range_bounds()[4], 0); - EXPECT_EQ(partition->get_const_range_bounds()[5], 0); + EXPECT_EQ(partition->get_range_bounds()[0], 0); + EXPECT_EQ(partition->get_range_bounds()[1], 0); + EXPECT_EQ(partition->get_range_bounds()[2], 0); + EXPECT_EQ(partition->get_range_bounds()[3], 0); + EXPECT_EQ(partition->get_range_bounds()[4], 0); + EXPECT_EQ(partition->get_range_bounds()[5], 0); EXPECT_EQ(partition->get_part_ids()[0], 0); EXPECT_EQ(partition->get_part_ids()[1], 1); EXPECT_EQ(partition->get_part_ids()[2], 2); @@ -295,12 +289,12 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) EXPECT_EQ(partition->get_size(), 3); EXPECT_EQ(partition->get_num_ranges(), 5); EXPECT_EQ(partition->get_num_parts(), 5); - EXPECT_EQ(partition->get_const_range_bounds()[0], 0); - EXPECT_EQ(partition->get_const_range_bounds()[1], 1); - EXPECT_EQ(partition->get_const_range_bounds()[2], 2); - EXPECT_EQ(partition->get_const_range_bounds()[3], 3); - EXPECT_EQ(partition->get_const_range_bounds()[4], 3); - EXPECT_EQ(partition->get_const_range_bounds()[5], 3); + EXPECT_EQ(partition->get_range_bounds()[0], 0); + EXPECT_EQ(partition->get_range_bounds()[1], 1); + EXPECT_EQ(partition->get_range_bounds()[2], 2); + EXPECT_EQ(partition->get_range_bounds()[3], 3); + EXPECT_EQ(partition->get_range_bounds()[4], 3); + EXPECT_EQ(partition->get_range_bounds()[5], 3); EXPECT_EQ(partition->get_part_ids()[0], 0); EXPECT_EQ(partition->get_part_ids()[1], 1); EXPECT_EQ(partition->get_part_ids()[2], 2); diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 04cd88b5ae0..011b989884f 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -85,15 +85,19 @@ class Partition : public ::testing::Test { ASSERT_EQ(part->get_num_parts(), dpart->get_num_parts()); ASSERT_EQ(part->get_num_empty_parts(), dpart->get_num_empty_parts()); GKO_ASSERT_ARRAY_EQ( - gko::make_array_view(this->ref, part->get_num_ranges() + 1, - part->get_range_bounds()), - gko::make_array_view(this->exec, dpart->get_num_ranges() + 1, - dpart->get_range_bounds())); + gko::make_array_view( + this->ref, part->get_num_ranges() + 1, + const_cast(part->get_range_bounds())), + gko::make_array_view( + this->exec, dpart->get_num_ranges() + 1, + const_cast(dpart->get_range_bounds()))); GKO_ASSERT_ARRAY_EQ( - gko::make_array_view(this->ref, part->get_num_ranges(), - part->get_part_ids()), - gko::make_array_view(this->exec, dpart->get_num_ranges(), - dpart->get_part_ids())); + gko::make_array_view( + this->ref, part->get_num_ranges(), + const_cast(part->get_part_ids())), + gko::make_array_view( + this->exec, dpart->get_num_ranges(), + const_cast(dpart->get_part_ids()))); GKO_ASSERT_ARRAY_EQ( gko::make_array_view(this->ref, part->get_num_ranges(), const_cast( From b55e2673bc999918508e8c40eaac612439f94501 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 28 Oct 2021 15:03:39 +0200 Subject: [PATCH 54/59] review update - clarify documentation - renaming - small kernel fix - simplify tests Co-authored-by: Pratik Nayak --- core/distributed/partition.cpp | 6 +- cuda/distributed/partition_kernels.cu | 4 +- dpcpp/distributed/partition_kernels.dp.cpp | 4 +- hip/distributed/partition_kernels.hip.cpp | 4 +- include/ginkgo/core/distributed/partition.hpp | 60 +++-- .../test/distributed/partition_kernels.cpp | 211 +++++------------- test/distributed/partition_kernels.cpp | 6 +- 7 files changed, 107 insertions(+), 188 deletions(-) diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index e5804ec31f1..1608fe21ecc 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -115,16 +115,16 @@ void Partition::compute_range_starting_indices() template -bool Partition::is_connected() +bool Partition::has_connected_parts() { return get_num_parts() - get_num_empty_parts() == get_num_ranges(); } template -bool Partition::is_ordered() +bool Partition::has_ordered_parts() { - if (is_connected()) { + if (has_connected_parts()) { auto exec = this->get_executor(); bool is_ordered; exec->run(partition::make_is_ordered(this, &is_ordered)); diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index d6827f32540..8127ba897ce 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -61,7 +61,7 @@ void build_starting_indices(std::shared_ptr exec, Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; + Array permutation{exec, num_ranges}; // set sizes to 0 in case of empty parts components::fill_array(exec, sizes, num_parts, LocalIndexType{}); @@ -76,7 +76,7 @@ void build_starting_indices(std::shared_ptr exec, } range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); + permutation[i] = static_cast(i); }, num_ranges, num_ranges, num_parts, range_offsets, range_parts, range_sizes, tmp_part_ids, permutation); diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 7d8b0f9f2f6..57da7e9ce43 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -49,7 +49,7 @@ void build_starting_indices(std::shared_ptr exec, Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; + Array permutation{exec, num_ranges}; // set sizes to 0 in case of empty parts components::fill_array(exec, sizes, num_parts, LocalIndexType{}); @@ -64,7 +64,7 @@ void build_starting_indices(std::shared_ptr exec, } range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); + permutation[i] = static_cast(i); }, num_ranges, num_ranges, num_parts, range_offsets, range_parts, range_sizes, tmp_part_ids, permutation); diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index f64442e093f..1efa61d3bf1 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -61,7 +61,7 @@ void build_starting_indices(std::shared_ptr exec, Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; + Array permutation{exec, num_ranges}; // set sizes to 0 in case of empty parts components::fill_array(exec, sizes, num_parts, LocalIndexType{}); @@ -76,7 +76,7 @@ void build_starting_indices(std::shared_ptr exec, } range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); + permutation[i] = static_cast(i); }, num_ranges, num_ranges, num_parts, range_offsets, range_parts, range_sizes, tmp_part_ids, permutation); diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index b17b312e9e0..25d5557b0da 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -49,7 +49,36 @@ namespace distributed { * with an associated part ID and local index (number of indices in this part * before `begin`). * Global indices are stored as 64 bit signed integers (int64), part-local - * indices use LocalIndexType, Part IDs use 32 bit signed integers (int) + * indices use LocalIndexType, Part IDs use 32 bit signed integers (int). + * + * For example, consider the interval [0, 13) that is partitioned into the + * following ranges: + * ``` + * [0,3), [3, 7), [7, 8), [8, 10), [10, 13). + * ``` + * These ranges are distributed on three part with: + * ``` + * p_0 = [0, 3) + [7, 8) + [10, 13), + * p_1 = [3, 7), + * p_2 = [8, 10). + * ``` + * The part ids can be queried from the @ref get_part_ids array, and the ranges + * are represented as offsets, accessed by @ref get_range_bounds, leading to the + * array: + * ``` + * r = [0, 3, 7, 8, 10, 13] + * ``` + * so that individual ranges are given by `[r[i], r[i + 1])`. + * Since each part may be associated with multiple ranges, it is possible to get + * the starting index for each range that is local to the owning part, see @ref + * get_range_starting_indices. For the partition above that means + * ``` + * starting_index[0] = 0, + * starting_index[1] = 0, + * starting_index[2] = 3, // second range of part 1 + * starting_index[3] = 0, + * starting_index[4] = 4, // third range of part 1 + * ``` * * @tparam LocalIndexType The index type used for part-local indices. * To prevent overflows, no single part's size may @@ -123,23 +152,16 @@ class Partition : public EnablePolymorphicObject>, return part_ids_.get_const_data(); } - /** - * Compute the range_starting_indices and part_sizes based on the current - * range_bounds and part_ids. - */ - void compute_range_starting_indices(); - /** * Returns the part-local starting index for each range in this partition. * * Consider the partition on `[0, 10)` with - * > p_1 = [0-4), [7-10), - * > p_2 = [4-7). + * ``` + * p_1 = [0-4) + [7-10), + * p_2 = [4-7). + * ``` * Then `range_starting_indices[0] = 0`, `range_starting_indices[1] = 0`, - `range_starting_indices[2] = 5`. - * - * @note These values can only be used after compute_range_ranks() was - executed. + * `range_starting_indices[2] = 5`. */ const local_index_type* get_range_starting_indices() const noexcept { @@ -166,16 +188,16 @@ class Partition : public EnablePolymorphicObject>, } /** - * Checks if each part is associated with at most one contiguous range. + * Checks if each part has no more than one contiguous range. */ - bool is_connected(); + bool has_connected_parts(); /** * Checks if the ranges are ordered by their part index. * * Implies that the partition is connected. */ - bool is_ordered(); + bool has_ordered_parts(); /** @@ -240,6 +262,12 @@ class Partition : public EnablePolymorphicObject>, part_ids_.fill(0); } + /** + * Compute the range_starting_indices and part_sizes based on the current + * range_bounds and part_ids. + */ + void compute_range_starting_indices(); + comm_index_type num_parts_; comm_index_type num_empty_parts_; Array offsets_; diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index d2048af7eef..9b82622401e 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -56,6 +56,16 @@ using global_index_type = gko::distributed::global_index_type; using comm_index_type = gko::distributed::comm_index_type; +template +void assert_equal_data(const T* data, std::initializer_list reference_data) +{ + std::vector ref(std::move(reference_data)); + for (auto i = 0; i < ref.size(); ++i) { + EXPECT_EQ(data[i], ref[i]); + } +} + + template class Partition : public ::testing::Test { protected: @@ -83,40 +93,14 @@ TYPED_TEST(Partition, BuildsFromMapping) EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); EXPECT_EQ(partition->get_num_ranges(), num_ranges); EXPECT_EQ(partition->get_num_parts(), num_parts); - EXPECT_EQ(partition->get_range_bounds()[0], 0); - EXPECT_EQ(partition->get_range_bounds()[1], 2); - EXPECT_EQ(partition->get_range_bounds()[2], 3); - EXPECT_EQ(partition->get_range_bounds()[3], 5); - EXPECT_EQ(partition->get_range_bounds()[4], 6); - EXPECT_EQ(partition->get_range_bounds()[5], 8); - EXPECT_EQ(partition->get_range_bounds()[6], 9); - EXPECT_EQ(partition->get_range_bounds()[7], 10); - EXPECT_EQ(partition->get_range_bounds()[8], 13); - EXPECT_EQ(partition->get_range_bounds()[9], 15); - EXPECT_EQ(partition->get_range_bounds()[10], 16); - EXPECT_EQ(partition->get_part_ids()[0], 2); - EXPECT_EQ(partition->get_part_ids()[1], 0); - EXPECT_EQ(partition->get_part_ids()[2], 1); - EXPECT_EQ(partition->get_part_ids()[3], 2); - EXPECT_EQ(partition->get_part_ids()[4], 0); - EXPECT_EQ(partition->get_part_ids()[5], 1); - EXPECT_EQ(partition->get_part_ids()[6], 0); - EXPECT_EQ(partition->get_part_ids()[7], 1); - EXPECT_EQ(partition->get_part_ids()[8], 2); - EXPECT_EQ(partition->get_part_ids()[9], 0); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 2); - EXPECT_EQ(partition->get_range_starting_indices()[4], 1); - EXPECT_EQ(partition->get_range_starting_indices()[5], 2); - EXPECT_EQ(partition->get_range_starting_indices()[6], 3); - EXPECT_EQ(partition->get_range_starting_indices()[7], 3); - EXPECT_EQ(partition->get_range_starting_indices()[8], 3); - EXPECT_EQ(partition->get_range_starting_indices()[9], 4); - EXPECT_EQ(partition->get_part_sizes()[0], 5); - EXPECT_EQ(partition->get_part_sizes()[1], 6); - EXPECT_EQ(partition->get_part_sizes()[2], 5); + EXPECT_EQ(partition->get_num_empty_parts(), 0); + assert_equal_data(partition->get_range_bounds(), + {0, 2, 3, 5, 6, 8, 9, 10, 13, 15, 16}); + assert_equal_data(partition->get_part_ids(), + {2, 0, 1, 2, 0, 1, 0, 1, 2, 0}); + assert_equal_data(partition->get_range_starting_indices(), + {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); + assert_equal_data(partition->get_part_sizes(), {5, 6, 5}); } @@ -136,42 +120,13 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) EXPECT_EQ(partition->get_num_ranges(), num_ranges); EXPECT_EQ(partition->get_num_parts(), num_parts); EXPECT_EQ(partition->get_num_empty_parts(), 2); - EXPECT_EQ(partition->get_range_bounds()[0], 0); - EXPECT_EQ(partition->get_range_bounds()[1], 2); - EXPECT_EQ(partition->get_range_bounds()[2], 3); - EXPECT_EQ(partition->get_range_bounds()[3], 5); - EXPECT_EQ(partition->get_range_bounds()[4], 6); - EXPECT_EQ(partition->get_range_bounds()[5], 8); - EXPECT_EQ(partition->get_range_bounds()[6], 9); - EXPECT_EQ(partition->get_range_bounds()[7], 10); - EXPECT_EQ(partition->get_range_bounds()[8], 13); - EXPECT_EQ(partition->get_range_bounds()[9], 15); - EXPECT_EQ(partition->get_range_bounds()[10], 16); - EXPECT_EQ(partition->get_part_ids()[0], 3); - EXPECT_EQ(partition->get_part_ids()[1], 0); - EXPECT_EQ(partition->get_part_ids()[2], 1); - EXPECT_EQ(partition->get_part_ids()[3], 3); - EXPECT_EQ(partition->get_part_ids()[4], 0); - EXPECT_EQ(partition->get_part_ids()[5], 1); - EXPECT_EQ(partition->get_part_ids()[6], 0); - EXPECT_EQ(partition->get_part_ids()[7], 1); - EXPECT_EQ(partition->get_part_ids()[8], 3); - EXPECT_EQ(partition->get_part_ids()[9], 0); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 2); - EXPECT_EQ(partition->get_range_starting_indices()[4], 1); - EXPECT_EQ(partition->get_range_starting_indices()[5], 2); - EXPECT_EQ(partition->get_range_starting_indices()[6], 3); - EXPECT_EQ(partition->get_range_starting_indices()[7], 3); - EXPECT_EQ(partition->get_range_starting_indices()[8], 3); - EXPECT_EQ(partition->get_range_starting_indices()[9], 4); - EXPECT_EQ(partition->get_part_sizes()[0], 5); - EXPECT_EQ(partition->get_part_sizes()[1], 6); - EXPECT_EQ(partition->get_part_sizes()[2], 0); - EXPECT_EQ(partition->get_part_sizes()[3], 5); - EXPECT_EQ(partition->get_part_sizes()[4], 0); + assert_equal_data(partition->get_range_bounds(), + {0, 2, 3, 5, 6, 8, 9, 10, 13, 15, 16}); + assert_equal_data(partition->get_part_ids(), + {3, 0, 1, 3, 0, 1, 0, 1, 3, 0}); + assert_equal_data(partition->get_range_starting_indices(), + {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); + assert_equal_data(partition->get_part_sizes(), {5, 6, 0, 5, 0}); } @@ -188,27 +143,11 @@ TYPED_TEST(Partition, BuildsFromRanges) ranges.get_data()[ranges.get_num_elems() - 1]); EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1); EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1); - EXPECT_EQ(partition->get_range_bounds()[0], 0); - EXPECT_EQ(partition->get_range_bounds()[1], 5); - EXPECT_EQ(partition->get_range_bounds()[2], 5); - EXPECT_EQ(partition->get_range_bounds()[3], 7); - EXPECT_EQ(partition->get_range_bounds()[4], 9); - EXPECT_EQ(partition->get_range_bounds()[5], 10); - EXPECT_EQ(partition->get_part_ids()[0], 0); - EXPECT_EQ(partition->get_part_ids()[1], 1); - EXPECT_EQ(partition->get_part_ids()[2], 2); - EXPECT_EQ(partition->get_part_ids()[3], 3); - EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 0); - EXPECT_EQ(partition->get_range_starting_indices()[4], 0); - EXPECT_EQ(partition->get_part_sizes()[0], 5); - EXPECT_EQ(partition->get_part_sizes()[1], 0); - EXPECT_EQ(partition->get_part_sizes()[2], 2); - EXPECT_EQ(partition->get_part_sizes()[3], 2); - EXPECT_EQ(partition->get_part_sizes()[4], 1); + EXPECT_EQ(partition->get_num_empty_parts(), 1); + assert_equal_data(partition->get_range_bounds(), {0, 5, 5, 7, 9, 10}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {5, 0, 2, 2, 1}); } TYPED_TEST(Partition, BuildsFromGlobalSize) @@ -221,27 +160,11 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) EXPECT_EQ(partition->get_size(), 13); EXPECT_EQ(partition->get_num_ranges(), 5); EXPECT_EQ(partition->get_num_parts(), 5); - EXPECT_EQ(partition->get_range_bounds()[0], 0); - EXPECT_EQ(partition->get_range_bounds()[1], 3); - EXPECT_EQ(partition->get_range_bounds()[2], 6); - EXPECT_EQ(partition->get_range_bounds()[3], 9); - EXPECT_EQ(partition->get_range_bounds()[4], 11); - EXPECT_EQ(partition->get_range_bounds()[5], 13); - EXPECT_EQ(partition->get_part_ids()[0], 0); - EXPECT_EQ(partition->get_part_ids()[1], 1); - EXPECT_EQ(partition->get_part_ids()[2], 2); - EXPECT_EQ(partition->get_part_ids()[3], 3); - EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 0); - EXPECT_EQ(partition->get_range_starting_indices()[4], 0); - EXPECT_EQ(partition->get_part_sizes()[0], 3); - EXPECT_EQ(partition->get_part_sizes()[1], 3); - EXPECT_EQ(partition->get_part_sizes()[2], 3); - EXPECT_EQ(partition->get_part_sizes()[3], 2); - EXPECT_EQ(partition->get_part_sizes()[4], 2); + EXPECT_EQ(partition->get_num_empty_parts(), 0); + assert_equal_data(partition->get_range_bounds(), {0, 3, 6, 9, 11, 13}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {3, 3, 3, 2, 2}); } @@ -255,27 +178,11 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) EXPECT_EQ(partition->get_size(), 0); EXPECT_EQ(partition->get_num_ranges(), 5); EXPECT_EQ(partition->get_num_parts(), 5); - EXPECT_EQ(partition->get_range_bounds()[0], 0); - EXPECT_EQ(partition->get_range_bounds()[1], 0); - EXPECT_EQ(partition->get_range_bounds()[2], 0); - EXPECT_EQ(partition->get_range_bounds()[3], 0); - EXPECT_EQ(partition->get_range_bounds()[4], 0); - EXPECT_EQ(partition->get_range_bounds()[5], 0); - EXPECT_EQ(partition->get_part_ids()[0], 0); - EXPECT_EQ(partition->get_part_ids()[1], 1); - EXPECT_EQ(partition->get_part_ids()[2], 2); - EXPECT_EQ(partition->get_part_ids()[3], 3); - EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 0); - EXPECT_EQ(partition->get_range_starting_indices()[4], 0); - EXPECT_EQ(partition->get_part_sizes()[0], 0); - EXPECT_EQ(partition->get_part_sizes()[1], 0); - EXPECT_EQ(partition->get_part_sizes()[2], 0); - EXPECT_EQ(partition->get_part_sizes()[3], 0); - EXPECT_EQ(partition->get_part_sizes()[4], 0); + EXPECT_EQ(partition->get_num_empty_parts(), 5); + assert_equal_data(partition->get_range_bounds(), {0, 0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {0, 0, 0, 0, 0}); } @@ -289,27 +196,11 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) EXPECT_EQ(partition->get_size(), 3); EXPECT_EQ(partition->get_num_ranges(), 5); EXPECT_EQ(partition->get_num_parts(), 5); - EXPECT_EQ(partition->get_range_bounds()[0], 0); - EXPECT_EQ(partition->get_range_bounds()[1], 1); - EXPECT_EQ(partition->get_range_bounds()[2], 2); - EXPECT_EQ(partition->get_range_bounds()[3], 3); - EXPECT_EQ(partition->get_range_bounds()[4], 3); - EXPECT_EQ(partition->get_range_bounds()[5], 3); - EXPECT_EQ(partition->get_part_ids()[0], 0); - EXPECT_EQ(partition->get_part_ids()[1], 1); - EXPECT_EQ(partition->get_part_ids()[2], 2); - EXPECT_EQ(partition->get_part_ids()[3], 3); - EXPECT_EQ(partition->get_part_ids()[4], 4); - EXPECT_EQ(partition->get_range_starting_indices()[0], 0); - EXPECT_EQ(partition->get_range_starting_indices()[1], 0); - EXPECT_EQ(partition->get_range_starting_indices()[2], 0); - EXPECT_EQ(partition->get_range_starting_indices()[3], 0); - EXPECT_EQ(partition->get_range_starting_indices()[4], 0); - EXPECT_EQ(partition->get_part_sizes()[0], 1); - EXPECT_EQ(partition->get_part_sizes()[1], 1); - EXPECT_EQ(partition->get_part_sizes()[2], 1); - EXPECT_EQ(partition->get_part_sizes()[3], 0); - EXPECT_EQ(partition->get_part_sizes()[4], 0); + EXPECT_EQ(partition->get_num_empty_parts(), 2); + assert_equal_data(partition->get_range_bounds(), {0, 1, 2, 3, 3, 3}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {1, 1, 1, 0, 0}); } @@ -321,7 +212,7 @@ TYPED_TEST(Partition, IsConnected) this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, 3)); - ASSERT_TRUE(part->is_connected()); + ASSERT_TRUE(part->has_connected_parts()); } @@ -333,7 +224,7 @@ TYPED_TEST(Partition, IsConnectedWithEmptyParts) this->ref, gko::Array{this->ref, {0, 0, 2, 2, 5}}, 6)); - ASSERT_TRUE(part->is_connected()); + ASSERT_TRUE(part->has_connected_parts()); } @@ -345,7 +236,7 @@ TYPED_TEST(Partition, IsConnectedUnordered) this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); - ASSERT_TRUE(part->is_connected()); + ASSERT_TRUE(part->has_connected_parts()); } @@ -357,7 +248,7 @@ TYPED_TEST(Partition, IsConnectedFail) this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, 3)); - ASSERT_FALSE(part->is_connected()); + ASSERT_FALSE(part->has_connected_parts()); } @@ -369,7 +260,7 @@ TYPED_TEST(Partition, IsOrdered) this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); - ASSERT_FALSE(part->is_ordered()); + ASSERT_FALSE(part->has_ordered_parts()); } @@ -381,7 +272,7 @@ TYPED_TEST(Partition, IsOrderedFail) this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3)); - ASSERT_TRUE(part->is_ordered()); + ASSERT_TRUE(part->has_ordered_parts()); } diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 011b989884f..9d17230356d 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -382,7 +382,7 @@ TYPED_TEST(Partition, IsOrderedTrue) gko::distributed::Partition::build_from_mapping( this->exec, mapping, num_parts); - ASSERT_TRUE(dpart->is_ordered()); + ASSERT_TRUE(dpart->has_ordered_parts()); } @@ -402,7 +402,7 @@ TYPED_TEST(Partition, IsOrderedFail) gko::distributed::Partition::build_from_mapping( this->exec, mapping, num_parts); - ASSERT_FALSE(dpart->is_ordered()); + ASSERT_FALSE(dpart->has_ordered_parts()); } @@ -422,7 +422,7 @@ TYPED_TEST(Partition, IsOrderedRandom) gko::distributed::Partition::build_from_mapping( this->exec, mapping, num_parts); - ASSERT_EQ(part->is_ordered(), dpart->is_ordered()); + ASSERT_EQ(part->has_ordered_parts(), dpart->has_ordered_parts()); } From 1720ca343e4a8f9d65d6c1dca3a090b193fce27b Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 28 Oct 2021 16:11:53 +0200 Subject: [PATCH 55/59] add template for global_index_type Co-authored-by: Pratik Nayak --- .../unified/distributed/partition_kernels.cpp | 31 ++- core/device_hooks/common_kernels.inc.cpp | 15 +- core/distributed/partition.cpp | 52 ++-- core/distributed/partition_kernels.hpp | 64 ++--- core/test/utils.hpp | 6 + cuda/distributed/partition_kernels.cu | 10 +- dpcpp/distributed/partition_kernels.dp.cpp | 10 +- hip/distributed/partition_kernels.hip.cpp | 10 +- include/ginkgo/core/base/types.hpp | 19 +- include/ginkgo/core/distributed/partition.hpp | 28 ++- omp/distributed/partition_kernels.cpp | 6 +- reference/distributed/partition_kernels.cpp | 39 +-- .../test/distributed/partition_kernels.cpp | 99 +++++--- test/distributed/partition_kernels.cpp | 226 ++++++++++-------- 14 files changed, 364 insertions(+), 251 deletions(-) diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp index 4318c0e7e19..2f60e3cc75a 100644 --- a/common/unified/distributed/partition_kernels.cpp +++ b/common/unified/distributed/partition_kernels.cpp @@ -62,9 +62,10 @@ void count_ranges(std::shared_ptr exec, } +template void build_from_contiguous(std::shared_ptr exec, - const Array& ranges, - global_index_type* range_bounds, + const Array& ranges, + GlobalIndexType* range_bounds, comm_index_type* part_ids) { run_kernel( @@ -79,10 +80,13 @@ void build_from_contiguous(std::shared_ptr exec, ranges.get_num_elems() - 1, ranges, range_bounds, part_ids); } +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); + +template void build_from_mapping(std::shared_ptr exec, const Array& mapping, - global_index_type* range_bounds, + GlobalIndexType* range_bounds, comm_index_type* part_ids) { Array range_index_ranks{exec, mapping.get_num_elems() + 1}; @@ -114,11 +118,14 @@ void build_from_mapping(std::shared_ptr exec, range_index_ranks, range_bounds, part_ids); } +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); + +template void build_ranges_from_global_size(std::shared_ptr exec, comm_index_type num_parts, - global_index_type global_size, - Array& ranges) + GlobalIndexType global_size, + Array& ranges) { const auto size_per_part = global_size / num_parts; const auto rest = global_size - (num_parts * size_per_part); @@ -131,11 +138,14 @@ void build_ranges_from_global_size(std::shared_ptr exec, components::prefix_sum(exec, ranges.get_data(), ranges.get_num_elems()); } +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); + -template -void is_ordered(std::shared_ptr exec, - const distributed::Partition* partition, - bool* result) +template +void has_ordered_parts( + std::shared_ptr exec, + const distributed::Partition* partition, + bool* result) { const auto part_ids = partition->get_part_ids(); const auto num_ranges = partition->get_num_ranges(); @@ -154,7 +164,8 @@ void is_ordered(std::shared_ptr exec, exec->template copy_val_to_host(result_uint32.get_const_data())); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 461f9a48067..bbd9a43161a 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -112,6 +112,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _macro(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(_macro) +#define GKO_STUB_LOCAL_GLOBAL_TYPE(_macro) \ + template \ + _macro(LocalIndexType, GlobalIndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ + GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(_macro) + #define GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(_macro) \ template \ _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ @@ -215,11 +220,11 @@ namespace partition { GKO_STUB(GKO_PARTITION_COUNT_RANGES); -GKO_STUB(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); -GKO_STUB(GKO_PARTITION_BUILD_FROM_MAPPING); -GKO_STUB(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); +GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); +GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); +GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); +GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index 1608fe21ecc..83347981605 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -48,15 +48,15 @@ GKO_REGISTER_OPERATION(build_ranges_from_global_size, partition::build_ranges_from_global_size); GKO_REGISTER_OPERATION(build_starting_indices, partition::build_starting_indices); -GKO_REGISTER_OPERATION(is_ordered, partition::is_ordered); +GKO_REGISTER_OPERATION(has_ordered_parts, partition::has_ordered_parts); } // namespace partition -template -std::unique_ptr> -Partition::build_from_mapping( +template +std::unique_ptr> +Partition::build_from_mapping( std::shared_ptr exec, const Array& mapping, comm_index_type num_parts) { @@ -72,11 +72,10 @@ Partition::build_from_mapping( } -template -std::unique_ptr> -Partition::build_from_contiguous( - std::shared_ptr exec, - const Array& ranges) +template +std::unique_ptr> +Partition::build_from_contiguous( + std::shared_ptr exec, const Array& ranges) { auto local_ranges = make_temporary_clone(exec, &ranges); auto result = Partition::create( @@ -90,21 +89,22 @@ Partition::build_from_contiguous( } -template -std::unique_ptr> -Partition::build_from_global_size_uniform( +template +std::unique_ptr> +Partition::build_from_global_size_uniform( std::shared_ptr exec, comm_index_type num_parts, - global_index_type global_size) + GlobalIndexType global_size) { - Array ranges(exec, num_parts + 1); + Array ranges(exec, num_parts + 1); exec->run(partition::make_build_ranges_from_global_size( num_parts, global_size, ranges)); - return Partition::build_from_contiguous(exec, ranges); + return Partition::build_from_contiguous(exec, ranges); } -template -void Partition::compute_range_starting_indices() +template +void Partition::compute_range_starting_indices() { auto exec = offsets_.get_executor(); exec->run(partition::make_build_starting_indices( @@ -114,29 +114,29 @@ void Partition::compute_range_starting_indices() } -template -bool Partition::has_connected_parts() +template +bool Partition::has_connected_parts() { return get_num_parts() - get_num_empty_parts() == get_num_ranges(); } -template -bool Partition::has_ordered_parts() +template +bool Partition::has_ordered_parts() { if (has_connected_parts()) { auto exec = this->get_executor(); - bool is_ordered; - exec->run(partition::make_is_ordered(this, &is_ordered)); - return is_ordered; + bool has_ordered_parts; + exec->run(partition::make_has_ordered_parts(this, &has_ordered_parts)); + return has_ordered_parts; } else { return false; } } -#define GKO_DECLARE_PARTITION(_type) class Partition<_type> -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION); +#define GKO_DECLARE_PARTITION(_local, _global) class Partition<_local, _global> +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(GKO_DECLARE_PARTITION); } // namespace distributed diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index 6de6bfd723f..e3aa6c11a00 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -49,49 +49,55 @@ namespace kernels { const Array& mapping, \ size_type& num_ranges) -#define GKO_PARTITION_BUILD_FROM_CONTIGUOUS \ +#define GKO_PARTITION_BUILD_FROM_CONTIGUOUS(GlobalIndexType) \ void build_from_contiguous(std::shared_ptr exec, \ - const Array& ranges, \ - global_index_type* range_bounds, \ + const Array& ranges, \ + GlobalIndexType* range_bounds, \ comm_index_type* part_ids) -#define GKO_PARTITION_BUILD_FROM_MAPPING \ +#define GKO_PARTITION_BUILD_FROM_MAPPING(GlobalIndexType) \ void build_from_mapping(std::shared_ptr exec, \ const Array& mapping, \ - global_index_type* range_bounds, \ + GlobalIndexType* range_bounds, \ comm_index_type* part_ids) -#define GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE \ - void build_ranges_from_global_size( \ - std::shared_ptr exec, \ - comm_index_type num_parts, global_index_type global_size, \ - Array& ranges) +#define GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE(GlobalIndexType) \ + void build_ranges_from_global_size( \ + std::shared_ptr exec, \ + comm_index_type num_parts, GlobalIndexType global_size, \ + Array& ranges) -#define GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType) \ +#define GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType, \ + GlobalIndexType) \ void build_starting_indices(std::shared_ptr exec, \ - const global_index_type* range_offsets, \ + const GlobalIndexType* range_offsets, \ const int* range_parts, size_type num_ranges, \ comm_index_type num_parts, \ comm_index_type& num_empty_parts, \ LocalIndexType* ranks, LocalIndexType* sizes) -#define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) \ - void is_ordered(std::shared_ptr exec, \ - const distributed::Partition* partition, \ - bool* result) - - -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - using global_index_type = distributed::global_index_type; \ - using comm_index_type = distributed::comm_index_type; \ - GKO_PARTITION_COUNT_RANGES; \ - GKO_PARTITION_BUILD_FROM_CONTIGUOUS; \ - GKO_PARTITION_BUILD_FROM_MAPPING; \ - GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE; \ - template \ - GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType); \ - template \ - GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType) +#define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType, GlobalIndexType) \ + void has_ordered_parts( \ + std::shared_ptr exec, \ + const distributed::Partition* \ + partition, \ + bool* result) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + using comm_index_type = distributed::comm_index_type; \ + GKO_PARTITION_COUNT_RANGES; \ + template \ + GKO_PARTITION_BUILD_FROM_CONTIGUOUS(GlobalIndexType); \ + template \ + GKO_PARTITION_BUILD_FROM_MAPPING(GlobalIndexType); \ + template \ + GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE(GlobalIndexType); \ + template \ + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType, \ + GlobalIndexType); \ + template \ + GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType, GlobalIndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition, GKO_DECLARE_ALL_AS_TEMPLATES); diff --git a/core/test/utils.hpp b/core/test/utils.hpp index 2bcae20d065..a848d51765a 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -85,6 +85,12 @@ using RealValueTypes = using IndexTypes = ::testing::Types; + +using LocalGlobalIndexTypes = + ::testing::Types, std::tuple, + std::tuple>; + + using PODTypes = #if GINKGO_DPCPP_SINGLE_MODE ::testing::Types; diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index 8127ba897ce..21a82169f4f 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -51,9 +51,9 @@ namespace cuda { namespace partition { -template +template void build_starting_indices(std::shared_ptr exec, - const global_index_type* range_offsets, + const GlobalIndexType* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, int& num_empty_parts, LocalIndexType* ranks, LocalIndexType* sizes) @@ -61,7 +61,7 @@ void build_starting_indices(std::shared_ptr exec, Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; + Array permutation{exec, num_ranges}; // set sizes to 0 in case of empty parts components::fill_array(exec, sizes, num_parts, LocalIndexType{}); @@ -76,7 +76,7 @@ void build_starting_indices(std::shared_ptr exec, } range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); + permutation[i] = static_cast(i); }, num_ranges, num_ranges, num_parts, range_offsets, range_parts, range_sizes, tmp_part_ids, permutation); @@ -115,7 +115,7 @@ void build_starting_indices(std::shared_ptr exec, thrust::count(thrust::device, sizes, sizes + num_parts, 0); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 57da7e9ce43..495ef6325d9 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -39,9 +39,9 @@ namespace dpcpp { namespace partition { -template +template void build_starting_indices(std::shared_ptr exec, - const global_index_type* range_offsets, + const GlobalIndexType* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, int& num_empty_parts, LocalIndexType* ranks, LocalIndexType* sizes) @@ -49,7 +49,7 @@ void build_starting_indices(std::shared_ptr exec, Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; + Array permutation{exec, num_ranges}; // set sizes to 0 in case of empty parts components::fill_array(exec, sizes, num_parts, LocalIndexType{}); @@ -64,7 +64,7 @@ void build_starting_indices(std::shared_ptr exec, } range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); + permutation[i] = static_cast(i); }, num_ranges, num_ranges, num_parts, range_offsets, range_parts, range_sizes, tmp_part_ids, permutation); @@ -98,7 +98,7 @@ void build_starting_indices(std::shared_ptr exec, num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index 1efa61d3bf1..a2bd3a43406 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -51,9 +51,9 @@ namespace hip { namespace partition { -template +template void build_starting_indices(std::shared_ptr exec, - const global_index_type* range_offsets, + const GlobalIndexType* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, int& num_empty_parts, LocalIndexType* ranks, LocalIndexType* sizes) @@ -61,7 +61,7 @@ void build_starting_indices(std::shared_ptr exec, Array range_sizes{exec, num_ranges}; // num_parts sentinel at the end Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; + Array permutation{exec, num_ranges}; // set sizes to 0 in case of empty parts components::fill_array(exec, sizes, num_parts, LocalIndexType{}); @@ -76,7 +76,7 @@ void build_starting_indices(std::shared_ptr exec, } range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); + permutation[i] = static_cast(i); }, num_ranges, num_ranges, num_parts, range_offsets, range_parts, range_sizes, tmp_part_ids, permutation); @@ -115,7 +115,7 @@ void build_starting_indices(std::shared_ptr exec, thrust::count(thrust::device, sizes, sizes + num_parts, 0); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 81a69ee8a0f..20874f05ee5 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -703,17 +703,26 @@ namespace distributed { /** - * Index type for global indices in a distributed system + * Index type for enumerating processors + * + * Conforms to the MPI C interface of e.g. MPI rank or size */ -using global_index_type = int64; +using comm_index_type = int; /** - * Index type for enumerating processors + * Instantiates a template for each valid combination of local and global index + * type * - * Conforms to the MPI C interface of e.g. MPI rank or size + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, where the first is replaced by the + * local index type and the second by the global index type. */ -using comm_index_type = int; +#define GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + template _macro(int32, int32); \ + template _macro(int32, int64); \ + template _macro(int64, int64); } // namespace distributed diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 25d5557b0da..9d603161ffe 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -83,23 +83,29 @@ namespace distributed { * @tparam LocalIndexType The index type used for part-local indices. * To prevent overflows, no single part's size may * exceed this index type's maximum value. + * @tparam GlobalIndexType The index type used for the global indices. Needs + * to be a larger type than LocalIndexType. */ -template -class Partition : public EnablePolymorphicObject>, - public EnablePolymorphicAssignment>, - public EnableCreateMethod> { - friend class EnableCreateMethod>; - friend class EnablePolymorphicObject>; - static_assert(sizeof(global_index_type) >= sizeof(LocalIndexType), - "global_index_type must be at least as large as " +template +class Partition + : public EnablePolymorphicObject< + Partition>, + public EnablePolymorphicAssignment< + Partition>, + public EnableCreateMethod> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + static_assert(sizeof(GlobalIndexType) >= sizeof(LocalIndexType), + "GlobalIndexType must be at least as large as " "LocalIndexType"); public: - using EnableCreateMethod>::create; - using EnablePolymorphicAssignment>::convert_to; - using EnablePolymorphicAssignment>::move_to; + using EnableCreateMethod::create; + using EnablePolymorphicAssignment::convert_to; + using EnablePolymorphicAssignment::move_to; using local_index_type = LocalIndexType; + using global_index_type = GlobalIndexType; /** * Returns the total number of elements represented by this partition. diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index a393f80cb8d..171e7ffc95f 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -45,9 +45,9 @@ namespace omp { namespace partition { -template +template void build_starting_indices(std::shared_ptr exec, - const global_index_type* range_offsets, + const GlobalIndexType* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, int& num_empty_parts, LocalIndexType* ranks, LocalIndexType* sizes) @@ -92,7 +92,7 @@ void build_starting_indices(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index 9b39ad6f974..c2f411f3c22 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -52,9 +52,10 @@ void count_ranges(std::shared_ptr exec, } +template void build_from_contiguous(std::shared_ptr exec, - const Array& ranges, - global_index_type* range_bounds, + const Array& ranges, + GlobalIndexType* range_bounds, comm_index_type* part_ids) { range_bounds[0] = 0; @@ -65,10 +66,13 @@ void build_from_contiguous(std::shared_ptr exec, } } +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); + +template void build_from_mapping(std::shared_ptr exec, const Array& mapping, - global_index_type* range_bounds, + GlobalIndexType* range_bounds, comm_index_type* part_ids) { size_type range_idx{}; @@ -83,14 +87,17 @@ void build_from_mapping(std::shared_ptr exec, } } range_bounds[range_idx] = - static_cast(mapping.get_num_elems()); + static_cast(mapping.get_num_elems()); } +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); + +template void build_ranges_from_global_size(std::shared_ptr exec, comm_index_type num_parts, - global_index_type global_size, - Array& ranges) + GlobalIndexType global_size, + Array& ranges) { const auto size_per_part = global_size / num_parts; const auto rest = global_size - (num_parts * size_per_part); @@ -104,10 +111,12 @@ void build_ranges_from_global_size(std::shared_ptr exec, } } +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); + -template +template void build_starting_indices(std::shared_ptr exec, - const global_index_type* range_offsets, + const GlobalIndexType* range_offsets, const int* range_parts, size_type num_ranges, int num_parts, int& num_empty_parts, LocalIndexType* ranks, LocalIndexType* sizes) @@ -124,13 +133,14 @@ void build_starting_indices(std::shared_ptr exec, num_empty_parts = std::count(sizes, sizes + num_parts, 0); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); -template -void is_ordered(std::shared_ptr exec, - const distributed::Partition* partition, - bool* result) +template +void has_ordered_parts( + std::shared_ptr exec, + const distributed::Partition* partition, + bool* result) { *result = true; auto part_ids = partition->get_part_ids(); @@ -143,7 +153,8 @@ void is_ordered(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition } // namespace reference diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index 9b82622401e..909dcf914a4 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -52,7 +52,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -using global_index_type = gko::distributed::global_index_type; using comm_index_type = gko::distributed::comm_index_type; @@ -66,29 +65,34 @@ void assert_equal_data(const T* data, std::initializer_list reference_data) } -template +template class Partition : public ::testing::Test { protected: - using local_index_type = LocalIndexType; + using local_index_type = + typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; Partition() : ref(gko::ReferenceExecutor::create()) {} std::shared_ptr ref; }; -TYPED_TEST_SUITE(Partition, gko::test::IndexTypes); +TYPED_TEST_SUITE(Partition, gko::test::LocalGlobalIndexTypes); TYPED_TEST(Partition, BuildsFromMapping) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array mapping{ this->ref, {2, 2, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0}}; comm_index_type num_parts = 3; gko::size_type num_ranges = 10; - auto partition = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); EXPECT_EQ(partition->get_num_ranges(), num_ranges); @@ -107,14 +111,16 @@ TYPED_TEST(Partition, BuildsFromMapping) TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array mapping{ this->ref, {3, 3, 0, 1, 1, 3, 0, 0, 1, 0, 1, 1, 1, 3, 3, 0}}; comm_index_type num_parts = 5; gko::size_type num_ranges = 10; - auto partition = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); EXPECT_EQ(partition->get_num_ranges(), num_ranges); @@ -133,11 +139,12 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) TYPED_TEST(Partition, BuildsFromRanges) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array ranges{this->ref, {0, 5, 5, 7, 9, 10}}; - auto partition = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); EXPECT_EQ(partition->get_size(), ranges.get_data()[ranges.get_num_elems() - 1]); @@ -153,9 +160,11 @@ TYPED_TEST(Partition, BuildsFromRanges) TYPED_TEST(Partition, BuildsFromGlobalSize) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; - auto partition = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->ref, 5, 13); + auto partition = + gko::distributed::Partition:: + build_from_global_size_uniform(this->ref, 5, 13); EXPECT_EQ(partition->get_size(), 13); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -171,9 +180,11 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; - auto partition = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->ref, 5, 0); + auto partition = + gko::distributed::Partition:: + build_from_global_size_uniform(this->ref, 5, 0); EXPECT_EQ(partition->get_size(), 0); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -189,9 +200,11 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; - auto partition = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->ref, 5, 3); + auto partition = + gko::distributed::Partition:: + build_from_global_size_uniform(this->ref, 5, 3); EXPECT_EQ(partition->get_size(), 3); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -207,10 +220,12 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) TYPED_TEST(Partition, IsConnected) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, - 3)); + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {0, 0, 1, 1, 2}}, 3)); ASSERT_TRUE(part->has_connected_parts()); } @@ -219,10 +234,12 @@ TYPED_TEST(Partition, IsConnected) TYPED_TEST(Partition, IsConnectedWithEmptyParts) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 0, 2, 2, 5}}, - 6)); + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {0, 0, 2, 2, 5}}, 6)); ASSERT_TRUE(part->has_connected_parts()); } @@ -231,10 +248,12 @@ TYPED_TEST(Partition, IsConnectedWithEmptyParts) TYPED_TEST(Partition, IsConnectedUnordered) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, - 3)); + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); ASSERT_TRUE(part->has_connected_parts()); } @@ -243,10 +262,12 @@ TYPED_TEST(Partition, IsConnectedUnordered) TYPED_TEST(Partition, IsConnectedFail) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, - 3)); + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {0, 1, 2, 0, 1}}, 3)); ASSERT_FALSE(part->has_connected_parts()); } @@ -255,10 +276,12 @@ TYPED_TEST(Partition, IsConnectedFail) TYPED_TEST(Partition, IsOrdered) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, - 3)); + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); ASSERT_FALSE(part->has_ordered_parts()); } @@ -267,10 +290,12 @@ TYPED_TEST(Partition, IsOrdered) TYPED_TEST(Partition, IsOrderedFail) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; auto part = gko::share( - gko::distributed::Partition::build_from_mapping( - this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, - 3)); + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3)); ASSERT_TRUE(part->has_ordered_parts()); } diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 9d17230356d..7f0f104fb50 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -53,14 +53,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -using global_index_type = gko::distributed::global_index_type; using comm_index_type = gko::distributed::comm_index_type; -template +template class Partition : public ::testing::Test { protected: - using local_index_type = LocalIndexType; + using local_index_type = + typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; Partition() : rand_engine(96457) {} void SetUp() @@ -77,8 +79,10 @@ class Partition : public ::testing::Test { } void assert_equal( - std::unique_ptr>& part, - std::unique_ptr>& dpart) + std::unique_ptr>& part, + std::unique_ptr>& dpart) { ASSERT_EQ(part->get_size(), dpart->get_size()); ASSERT_EQ(part->get_num_ranges(), dpart->get_num_ranges()); @@ -120,12 +124,13 @@ class Partition : public ::testing::Test { std::shared_ptr exec; }; -TYPED_TEST_SUITE(Partition, gko::test::IndexTypes); +TYPED_TEST_SUITE(Partition, gko::test::LocalGlobalIndexTypes); TYPED_TEST(Partition, BuildsFromMapping) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; std::uniform_int_distribution part_dist{0, num_parts - 1}; gko::Array mapping{this->ref, 10000}; @@ -134,12 +139,14 @@ TYPED_TEST(Partition, BuildsFromMapping) } gko::Array dmapping{this->exec, mapping}; - auto part = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, dmapping, num_parts); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + dmapping, + num_parts); this->assert_equal(part, dpart); } @@ -148,6 +155,7 @@ TYPED_TEST(Partition, BuildsFromMapping) TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; // skip part 0 std::uniform_int_distribution part_dist{1, num_parts - 1}; @@ -157,12 +165,14 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) } gko::Array dmapping{this->exec, mapping}; - auto part = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, dmapping, num_parts); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + dmapping, + num_parts); this->assert_equal(part, dpart); } @@ -171,6 +181,7 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; // return only part 1 std::uniform_int_distribution part_dist{1, 1}; @@ -180,12 +191,14 @@ TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) } gko::Array dmapping{this->exec, mapping}; - auto part = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, dmapping, num_parts); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + dmapping, + num_parts); this->assert_equal(part, dpart); } @@ -194,16 +207,19 @@ TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) TYPED_TEST(Partition, BuildsFromMappingWithAllPartsEmpty) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; gko::Array mapping{this->ref, 0}; gko::Array dmapping{this->exec, 0}; - auto part = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, dmapping, num_parts); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + dmapping, + num_parts); this->assert_equal(part, dpart); } @@ -212,17 +228,20 @@ TYPED_TEST(Partition, BuildsFromMappingWithAllPartsEmpty) TYPED_TEST(Partition, BuildsFromMappingWithOnePart) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 1; gko::Array mapping{this->ref, 10000}; mapping.fill(0); gko::Array dmapping{this->exec, mapping}; - auto part = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, dmapping, num_parts); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + dmapping, + num_parts); this->assert_equal(part, dpart); } @@ -231,16 +250,17 @@ TYPED_TEST(Partition, BuildsFromMappingWithOnePart) TYPED_TEST(Partition, BuildsFromContiguous) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array ranges{this->ref, {0, 1234, 3134, 4578, 16435, 60000}}; gko::Array dranges{this->exec, ranges}; - auto part = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); - auto dpart = - gko::distributed::Partition::build_from_contiguous( - this->exec, dranges); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->exec, + dranges); this->assert_equal(part, dpart); } @@ -249,16 +269,17 @@ TYPED_TEST(Partition, BuildsFromContiguous) TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array ranges{ this->ref, {0, 1234, 3134, 3134, 4578, 16435, 16435, 60000}}; gko::Array dranges{this->exec, ranges}; - auto part = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); - auto dpart = - gko::distributed::Partition::build_from_contiguous( - this->exec, dranges); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->exec, + dranges); this->assert_equal(part, dpart); } @@ -267,16 +288,17 @@ TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) TYPED_TEST(Partition, BuildsFromContiguousWithSomeMostlyEmptyParts) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array ranges{ this->ref, {0, 0, 3134, 4578, 4578, 4578, 4578, 4578}}; gko::Array dranges{this->exec, ranges}; - auto part = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); - auto dpart = - gko::distributed::Partition::build_from_contiguous( - this->exec, dranges); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->exec, + dranges); this->assert_equal(part, dpart); } @@ -285,15 +307,16 @@ TYPED_TEST(Partition, BuildsFromContiguousWithSomeMostlyEmptyParts) TYPED_TEST(Partition, BuildsFromContiguousWithOnlyEmptyParts) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array ranges{this->ref, {0, 0, 0, 0, 0, 0, 0}}; gko::Array dranges{this->exec, ranges}; - auto part = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); - auto dpart = - gko::distributed::Partition::build_from_contiguous( - this->exec, dranges); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->exec, + dranges); this->assert_equal(part, dpart); } @@ -302,15 +325,16 @@ TYPED_TEST(Partition, BuildsFromContiguousWithOnlyEmptyParts) TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; gko::Array ranges{this->ref, {0, 0}}; gko::Array dranges{this->exec, ranges}; - auto part = - gko::distributed::Partition::build_from_contiguous( - this->ref, ranges); - auto dpart = - gko::distributed::Partition::build_from_contiguous( - this->exec, dranges); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->exec, + dranges); this->assert_equal(part, dpart); } @@ -319,15 +343,16 @@ TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) TYPED_TEST(Partition, BuildsFromGlobalSize) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; const int num_parts = 7; const global_index_type global_size = 708; - auto part = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->ref, num_parts, - global_size); - auto dpart = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->exec, num_parts, - global_size); + auto part = + gko::distributed::Partition:: + build_from_global_size_uniform(this->ref, num_parts, global_size); + auto dpart = + gko::distributed::Partition:: + build_from_global_size_uniform(this->exec, num_parts, global_size); this->assert_equal(part, dpart); } @@ -336,15 +361,16 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; const int num_parts = 7; const global_index_type global_size = 0; - auto part = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->ref, num_parts, - global_size); - auto dpart = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->exec, num_parts, - global_size); + auto part = + gko::distributed::Partition:: + build_from_global_size_uniform(this->ref, num_parts, global_size); + auto dpart = + gko::distributed::Partition:: + build_from_global_size_uniform(this->exec, num_parts, global_size); this->assert_equal(part, dpart); } @@ -353,15 +379,16 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; const int num_parts = 77; const global_index_type global_size = 13; - auto part = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->ref, num_parts, - global_size); - auto dpart = gko::distributed::Partition< - local_index_type>::build_from_global_size_uniform(this->exec, num_parts, - global_size); + auto part = + gko::distributed::Partition:: + build_from_global_size_uniform(this->ref, num_parts, global_size); + auto dpart = + gko::distributed::Partition:: + build_from_global_size_uniform(this->exec, num_parts, global_size); this->assert_equal(part, dpart); } @@ -370,6 +397,7 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) TYPED_TEST(Partition, IsOrderedTrue) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; gko::size_type size_per_part = 1000; gko::size_type global_size = num_parts * size_per_part; @@ -378,9 +406,10 @@ TYPED_TEST(Partition, IsOrderedTrue) std::fill(mapping.get_data() + i * size_per_part, mapping.get_data() + (i + 1) * size_per_part, i); } - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, mapping, num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); ASSERT_TRUE(dpart->has_ordered_parts()); } @@ -389,6 +418,7 @@ TYPED_TEST(Partition, IsOrderedTrue) TYPED_TEST(Partition, IsOrderedFail) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; gko::size_type size_per_part = 1000; gko::size_type global_size = num_parts * size_per_part; @@ -398,9 +428,10 @@ TYPED_TEST(Partition, IsOrderedFail) mapping.get_data() + (i + 1) * size_per_part, num_parts - 1 - i); } - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, mapping, num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); ASSERT_FALSE(dpart->has_ordered_parts()); } @@ -409,18 +440,21 @@ TYPED_TEST(Partition, IsOrderedFail) TYPED_TEST(Partition, IsOrderedRandom) { using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; std::uniform_int_distribution part_dist{0, num_parts - 1}; gko::Array mapping{this->ref, 10000}; for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { mapping.get_data()[i] = part_dist(this->rand_engine); } - auto part = - gko::distributed::Partition::build_from_mapping( - this->ref, mapping, num_parts); - auto dpart = - gko::distributed::Partition::build_from_mapping( - this->exec, mapping, num_parts); + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); ASSERT_EQ(part->has_ordered_parts(), dpart->has_ordered_parts()); } From 8d550fbf554394ca91fcf940960a70689382eee5 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 24 Nov 2021 14:32:03 +0100 Subject: [PATCH 56/59] review updates - renaming - unifying kernels for computing local starting indices - documentation Co-authored-by: Aditya Kashi Co-authored-by: Yu-Hsiang Tsai --- .../distributed/partition_kernels.hpp.inc | 79 ++++++++++++++++ .../unified/distributed/partition_kernels.cpp | 36 +++++--- .../distributed/partition_kernels.hpp.inc | 92 +++++++++++++++++++ core/distributed/partition.cpp | 14 +-- cuda/distributed/partition_kernels.cu | 70 +------------- dpcpp/distributed/partition_kernels.dp.cpp | 60 +----------- hip/distributed/partition_kernels.hip.cpp | 70 +------------- include/ginkgo/core/base/types.hpp | 4 +- include/ginkgo/core/distributed/partition.hpp | 77 +++++++++++----- omp/distributed/partition_kernels.cpp | 6 +- .../test/distributed/partition_kernels.cpp | 43 ++++++++- test/distributed/partition_kernels.cpp | 45 +++++---- 12 files changed, 338 insertions(+), 258 deletions(-) create mode 100644 common/cuda_hip/distributed/partition_kernels.hpp.inc create mode 100644 common/unified/distributed/partition_kernels.hpp.inc diff --git a/common/cuda_hip/distributed/partition_kernels.hpp.inc b/common/cuda_hip/distributed/partition_kernels.hpp.inc new file mode 100644 index 00000000000..49e08dadfc5 --- /dev/null +++ b/common/cuda_hip/distributed/partition_kernels.hpp.inc @@ -0,0 +1,79 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include "common/unified/distributed/partition_kernels.hpp.inc" + + +template +void build_starting_indices(std::shared_ptr exec, + const GlobalIndexType* range_offsets, + const comm_index_type* range_parts, + size_type num_ranges, comm_index_type num_parts, + comm_index_type& num_empty_parts, + LocalIndexType* starting_indices, + LocalIndexType* part_sizes) +{ + Array range_sizes{exec, num_ranges}; + // num_parts sentinel at the end + Array tmp_part_ids{exec, num_ranges + 1}; + Array permutation{exec, num_ranges}; + // set part_sizes to 0 in case of empty parts + components::fill_array(exec, part_sizes, num_parts, LocalIndexType{}); + + + kernel::setup_sizes_ids_permutation(exec, num_ranges, num_parts, + range_offsets, range_parts, range_sizes, + tmp_part_ids, permutation); + + auto tmp_part_id_ptr = thrust::device_pointer_cast(tmp_part_ids.get_data()); + auto range_sizes_ptr = thrust::device_pointer_cast(range_sizes.get_data()); + auto permutation_ptr = thrust::device_pointer_cast(permutation.get_data()); + auto value_it = thrust::make_zip_iterator( + thrust::make_tuple(range_sizes_ptr, permutation_ptr)); + // group range_sizes by part ID + thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, value_it); + // compute inclusive prefix sum for each part + thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, range_sizes_ptr, + range_sizes_ptr); + // write back the results + kernel::compute_part_sizes_and_starting_indices( + exec, num_ranges, range_sizes, tmp_part_ids, permutation, + starting_indices, part_sizes); + num_empty_parts = + thrust::count(thrust::device, part_sizes, part_sizes + num_parts, 0); +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp index 2f60e3cc75a..08d79e17f43 100644 --- a/common/unified/distributed/partition_kernels.cpp +++ b/common/unified/distributed/partition_kernels.cpp @@ -44,6 +44,8 @@ namespace GKO_DEVICE_NAMESPACE { namespace partition { +using distributed::comm_index_type; + void count_ranges(std::shared_ptr exec, const Array& mapping, size_type& num_ranges) { @@ -89,25 +91,29 @@ void build_from_mapping(std::shared_ptr exec, GlobalIndexType* range_bounds, comm_index_type* part_ids) { - Array range_index_ranks{exec, mapping.get_num_elems() + 1}; + Array range_starting_index{exec, mapping.get_num_elems() + 1}; run_kernel( exec, - [] GKO_KERNEL(auto i, auto mapping, auto output) { - const auto prev_part = i > 0 ? mapping[i - 1] : comm_index_type{-1}; + [] GKO_KERNEL(auto i, auto mapping, auto range_starting_index) { + const auto prev_part = + i > 0 ? mapping[i - 1] : invalid_index(); const auto cur_part = mapping[i]; - output[i] = cur_part != prev_part ? 1 : 0; + range_starting_index[i] = cur_part != prev_part ? 1 : 0; }, - mapping.get_num_elems(), mapping, range_index_ranks); - components::prefix_sum(exec, range_index_ranks.get_data(), + mapping.get_num_elems(), mapping, range_starting_index); + components::prefix_sum(exec, range_starting_index.get_data(), mapping.get_num_elems() + 1); run_kernel( exec, - [] GKO_KERNEL(auto i, auto size, auto mapping, auto prefix_sum, - auto ranges, auto range_parts) { - const auto prev_part = i > 0 ? mapping[i - 1] : comm_index_type{-1}; - const auto cur_part = i < size ? mapping[i] : comm_index_type{-1}; + [] GKO_KERNEL(auto i, auto size, auto mapping, + auto range_starting_index, auto ranges, + auto range_parts) { + const auto prev_part = + i > 0 ? mapping[i - 1] : invalid_index(); + const auto cur_part = + i < size ? mapping[i] : invalid_index(); if (cur_part != prev_part) { - auto out_idx = prefix_sum[i]; + auto out_idx = range_starting_index[i]; ranges[out_idx] = i; if (i < size) { range_parts[out_idx] = cur_part; @@ -115,7 +121,7 @@ void build_from_mapping(std::shared_ptr exec, } }, mapping.get_num_elems() + 1, mapping.get_num_elems(), mapping, - range_index_ranks, range_bounds, part_ids); + range_starting_index, range_bounds, part_ids); } GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); @@ -157,11 +163,13 @@ void has_ordered_parts( [] GKO_KERNEL(auto i, const auto part_ids) { return static_cast(part_ids[i] < part_ids[i + 1]); }, - [] GKO_KERNEL(const auto a, const auto b) { return a && b; }, + [] GKO_KERNEL(const auto a, const auto b) { + return static_cast(a && b); + }, [] GKO_KERNEL(const auto a) { return a; }, uint32(1), result_uint32.get_data(), num_ranges - 1, part_ids); *result = static_cast( - exec->template copy_val_to_host(result_uint32.get_const_data())); + exec->copy_val_to_host(result_uint32.get_const_data())); } GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/common/unified/distributed/partition_kernels.hpp.inc b/common/unified/distributed/partition_kernels.hpp.inc new file mode 100644 index 00000000000..a702334945f --- /dev/null +++ b/common/unified/distributed/partition_kernels.hpp.inc @@ -0,0 +1,92 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +namespace kernel { + + +template +void setup_sizes_ids_permutation( + std::shared_ptr exec, size_type num_ranges, + comm_index_type num_parts, const GlobalIndexType* range_offsets, + const comm_index_type* range_parts, Array& range_sizes, + Array& part_ids, Array& permutation) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, + auto range_offsets, auto range_parts, auto range_sizes, + auto part_ids, auto permutation) { + if (i == 0) { + // set sentinel value at the end + part_ids[num_ranges] = num_parts; + } + range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; + part_ids[i] = range_parts[i]; + permutation[i] = static_cast(i); + }, + num_ranges, num_ranges, num_parts, range_offsets, range_parts, + range_sizes.get_data(), part_ids.get_data(), permutation.get_data()); +} + + +template +void compute_part_sizes_and_starting_indices( + std::shared_ptr exec, size_type num_ranges, + const Array& range_sizes, + const Array& part_ids, + const Array& permutation, LocalIndexType* starting_indices, + LocalIndexType* part_sizes) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto grouped_starting_indices, + auto grouped_part_ids, auto orig_idxs, + auto starting_indices, auto part_sizes) { + auto prev_part = i > 0 ? grouped_part_ids[i - 1] + : invalid_index(); + auto cur_part = grouped_part_ids[i]; + auto next_part = + grouped_part_ids[i + 1]; // last element has to be num_parts + if (cur_part != next_part) { + part_sizes[cur_part] = grouped_starting_indices[i]; + } + // write result shifted by one entry to get exclusive prefix sum + starting_indices[orig_idxs[i]] = + prev_part == cur_part ? grouped_starting_indices[i - 1] + : LocalIndexType{}; + }, + num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(), + permutation.get_const_data(), starting_indices, part_sizes); +} + + +} // namespace kernel diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index 83347981605..7de36e380db 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -67,7 +67,7 @@ Partition::build_from_mapping( exec->run(partition::make_build_from_mapping(*local_mapping.get(), result->offsets_.get_data(), result->part_ids_.get_data())); - result->compute_range_starting_indices(); + result->finalize_construction(); return result; } @@ -84,7 +84,7 @@ Partition::build_from_contiguous( exec->run(partition::make_build_from_contiguous( *local_ranges.get(), result->offsets_.get_data(), result->part_ids_.get_data())); - result->compute_range_starting_indices(); + result->finalize_construction(); return result; } @@ -103,28 +103,30 @@ Partition::build_from_global_size_uniform( template -void Partition::compute_range_starting_indices() +void Partition::finalize_construction() { auto exec = offsets_.get_executor(); exec->run(partition::make_build_starting_indices( offsets_.get_const_data(), part_ids_.get_const_data(), get_num_ranges(), get_num_parts(), num_empty_parts_, starting_indices_.get_data(), part_sizes_.get_data())); + size_ = offsets_.get_executor()->copy_val_to_host( + offsets_.get_const_data() + get_num_ranges()); } template bool Partition::has_connected_parts() { - return get_num_parts() - get_num_empty_parts() == get_num_ranges(); + return this->get_num_parts() - this->get_num_empty_parts() == + this->get_num_ranges(); } template bool Partition::has_ordered_parts() { - if (has_connected_parts()) { + if (this->has_connected_parts()) { auto exec = this->get_executor(); bool has_ordered_parts; exec->run(partition::make_has_ordered_parts(this, &has_ordered_parts)); diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu index 21a82169f4f..2ab5bab2ebe 100644 --- a/cuda/distributed/partition_kernels.cu +++ b/cuda/distributed/partition_kernels.cu @@ -41,8 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common/unified/base/kernel_launch.hpp" -#include "core/components/fill_array.hpp" -#include "core/components/prefix_sum.hpp" +#include "core/components/fill_array_kernels.hpp" namespace gko { @@ -51,72 +50,7 @@ namespace cuda { namespace partition { -template -void build_starting_indices(std::shared_ptr exec, - const GlobalIndexType* range_offsets, - const int* range_parts, size_type num_ranges, - int num_parts, int& num_empty_parts, - LocalIndexType* ranks, LocalIndexType* sizes) -{ - Array range_sizes{exec, num_ranges}; - // num_parts sentinel at the end - Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; - // set sizes to 0 in case of empty parts - components::fill_array(exec, sizes, num_parts, LocalIndexType{}); - - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, - auto range_offsets, auto range_parts, auto range_sizes, - auto tmp_part_ids, auto permutation) { - if (i == 0) { - // set sentinel value at the end - tmp_part_ids[num_ranges] = num_parts; - } - range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; - tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); - }, - num_ranges, num_ranges, num_parts, range_offsets, range_parts, - range_sizes, tmp_part_ids, permutation); - - auto tmp_part_id_ptr = thrust::device_pointer_cast(tmp_part_ids.get_data()); - auto range_sizes_ptr = thrust::device_pointer_cast(range_sizes.get_data()); - auto permutation_ptr = thrust::device_pointer_cast(permutation.get_data()); - auto value_it = thrust::make_zip_iterator( - thrust::make_tuple(range_sizes_ptr, permutation_ptr)); - // group sizes by part ID - thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, - tmp_part_id_ptr + num_ranges, value_it); - // compute inclusive prefix sum for each part - thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, - tmp_part_id_ptr + num_ranges, range_sizes_ptr, - range_sizes_ptr); - // write back the results - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, - auto orig_idxs, auto ranks, auto sizes) { - auto prev_part = - i > 0 ? grouped_part_ids[i - 1] : comm_index_type{-1}; - auto cur_part = grouped_part_ids[i]; - auto next_part = grouped_part_ids[i + 1]; // safe due to sentinel - if (cur_part != next_part) { - sizes[cur_part] = grouped_range_ranks[i]; - } - // write result shifted by one entry to get exclusive prefix sum - ranks[orig_idxs[i]] = prev_part == cur_part - ? grouped_range_ranks[i - 1] - : LocalIndexType{}; - }, - num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); - num_empty_parts = - thrust::count(thrust::device, sizes, sizes + num_parts, 0); -} - -GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +#include "common/cuda_hip/distributed/partition_kernels.hpp.inc" } // namespace partition diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 495ef6325d9..65f491c10df 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -42,61 +42,11 @@ namespace partition { template void build_starting_indices(std::shared_ptr exec, const GlobalIndexType* range_offsets, - const int* range_parts, size_type num_ranges, - int num_parts, int& num_empty_parts, - LocalIndexType* ranks, LocalIndexType* sizes) -{ - Array range_sizes{exec, num_ranges}; - // num_parts sentinel at the end - Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; - // set sizes to 0 in case of empty parts - components::fill_array(exec, sizes, num_parts, LocalIndexType{}); - - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, - auto range_offsets, auto range_parts, auto range_sizes, - auto tmp_part_ids, auto permutation) { - if (i == 0) { - // set sentinel value at the end - tmp_part_ids[num_ranges] = num_parts; - } - range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; - tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); - }, - num_ranges, num_ranges, num_parts, range_offsets, range_parts, - range_sizes, tmp_part_ids, permutation); - - // group sizes by part ID - // TODO oneDPL has stable_sort and views::zip - // compute inclusive prefix sum for each part - // TODO compute "row_ptrs" for tmp_part_ids - // TODO compute prefix_sum over range_sizes - // TODO compute adjacent differences, set -part_size at part boundaries - // TODO compute prefix_sum again - // write back the results - // TODO this needs to be adapted to the output of the algorithm above - // TODO count number of zeros in size and store in num_empty_parts - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, - auto orig_idxs, auto ranks, auto sizes) { - auto prev_part = - i > 0 ? grouped_part_ids[i - 1] : comm_index_type{-1}; - auto cur_part = grouped_part_ids[i]; - auto next_part = grouped_part_ids[i + 1]; // safe due to sentinel - if (cur_part != next_part) { - sizes[cur_part] = grouped_range_ranks[i]; - } - // write result shifted by one entry to get exclusive prefix sum - ranks[orig_idxs[i]] = prev_part == cur_part - ? grouped_range_ranks[i - 1] - : LocalIndexType{}; - }, - num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); -} + const comm_index_type* range_parts, + size_type num_ranges, comm_index_type num_parts, + comm_index_type& num_empty_parts, + LocalIndexType* starting_indices, + LocalIndexType* part_sizes) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp index a2bd3a43406..c4d0044dc33 100644 --- a/hip/distributed/partition_kernels.hip.cpp +++ b/hip/distributed/partition_kernels.hip.cpp @@ -41,8 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common/unified/base/kernel_launch.hpp" -#include "core/components/fill_array.hpp" -#include "core/components/prefix_sum.hpp" +#include "core/components/fill_array_kernels.hpp" namespace gko { @@ -51,72 +50,7 @@ namespace hip { namespace partition { -template -void build_starting_indices(std::shared_ptr exec, - const GlobalIndexType* range_offsets, - const int* range_parts, size_type num_ranges, - int num_parts, int& num_empty_parts, - LocalIndexType* ranks, LocalIndexType* sizes) -{ - Array range_sizes{exec, num_ranges}; - // num_parts sentinel at the end - Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; - // set sizes to 0 in case of empty parts - components::fill_array(exec, sizes, num_parts, LocalIndexType{}); - - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, - auto range_offsets, auto range_parts, auto range_sizes, - auto tmp_part_ids, auto permutation) { - if (i == 0) { - // set sentinel value at the end - tmp_part_ids[num_ranges] = num_parts; - } - range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; - tmp_part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); - }, - num_ranges, num_ranges, num_parts, range_offsets, range_parts, - range_sizes, tmp_part_ids, permutation); - - auto tmp_part_id_ptr = thrust::device_pointer_cast(tmp_part_ids.get_data()); - auto range_sizes_ptr = thrust::device_pointer_cast(range_sizes.get_data()); - auto permutation_ptr = thrust::device_pointer_cast(permutation.get_data()); - auto value_it = thrust::make_zip_iterator( - thrust::make_tuple(range_sizes_ptr, permutation_ptr)); - // group sizes by part ID - thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, - tmp_part_id_ptr + num_ranges, value_it); - // compute inclusive prefix sum for each part - thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, - tmp_part_id_ptr + num_ranges, range_sizes_ptr, - range_sizes_ptr); - // write back the results - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto grouped_range_ranks, auto grouped_part_ids, - auto orig_idxs, auto ranks, auto sizes) { - auto prev_part = - i > 0 ? grouped_part_ids[i - 1] : comm_index_type{-1}; - auto cur_part = grouped_part_ids[i]; - auto next_part = grouped_part_ids[i + 1]; // safe due to sentinel - if (cur_part != next_part) { - sizes[cur_part] = grouped_range_ranks[i]; - } - // write result shifted by one entry to get exclusive prefix sum - ranks[orig_idxs[i]] = prev_part == cur_part - ? grouped_range_ranks[i - 1] - : LocalIndexType{}; - }, - num_ranges, range_sizes, tmp_part_ids, permutation, ranks, sizes); - num_empty_parts = - thrust::count(thrust::device, sizes, sizes + num_parts, 0); -} - -GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +#include "common/cuda_hip/distributed/partition_kernels.hpp.inc" } // namespace partition diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 20874f05ee5..4a46c3924c0 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -703,7 +703,7 @@ namespace distributed { /** - * Index type for enumerating processors + * Index type for enumerating processes in a distributed application * * Conforms to the MPI C interface of e.g. MPI rank or size */ @@ -722,7 +722,7 @@ using comm_index_type = int; #define GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ template _macro(int32, int32); \ template _macro(int32, int64); \ - template _macro(int64, int64); + template _macro(int64, int64) } // namespace distributed diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 9d603161ffe..496da0be91b 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -54,37 +54,45 @@ namespace distributed { * For example, consider the interval [0, 13) that is partitioned into the * following ranges: * ``` - * [0,3), [3, 7), [7, 8), [8, 10), [10, 13). + * [0,3), [3, 6), [6, 8), [8, 10), [10, 13). * ``` * These ranges are distributed on three part with: * ``` - * p_0 = [0, 3) + [7, 8) + [10, 13), - * p_1 = [3, 7), + * p_0 = [0, 3) + [6, 8) + [10, 13), + * p_1 = [3, 6), * p_2 = [8, 10). * ``` * The part ids can be queried from the @ref get_part_ids array, and the ranges * are represented as offsets, accessed by @ref get_range_bounds, leading to the - * array: + * offset array: * ``` - * r = [0, 3, 7, 8, 10, 13] + * r = [0, 3, 6, 8, 10, 13] * ``` * so that individual ranges are given by `[r[i], r[i + 1])`. * Since each part may be associated with multiple ranges, it is possible to get * the starting index for each range that is local to the owning part, see @ref - * get_range_starting_indices. For the partition above that means + * get_range_starting_indices. These indices can be used to easily iterate over + * part local data. For example, the above partition has the following starting + * indices * ``` * starting_index[0] = 0, * starting_index[1] = 0, * starting_index[2] = 3, // second range of part 1 * starting_index[3] = 0, - * starting_index[4] = 4, // third range of part 1 + * starting_index[4] = 5, // third range of part 1 * ``` + * which you can use to iterate only over the the second range of part 1 (the + * third global range) with + * ``` + * for(int i = 0; i < r[3] - r[2]; ++i){ + * data[starting_index[2] + i] = val; + * } * * @tparam LocalIndexType The index type used for part-local indices. * To prevent overflows, no single part's size may * exceed this index type's maximum value. * @tparam GlobalIndexType The index type used for the global indices. Needs - * to be a larger type than LocalIndexType. + * to be at least as large a type as LocalIndexType. */ template class Partition @@ -109,16 +117,16 @@ class Partition /** * Returns the total number of elements represented by this partition. + * + * @return number elements. */ - size_type get_size() const - { - return offsets_.get_executor()->copy_val_to_host( - offsets_.get_const_data() + get_num_ranges()); - } + size_type get_size() const { return size_; } /** * Returns the number of ranges stored by this partition. * This size refers to the data returned by get_range_bounds(). + * + * @return number of ranges. */ size_type get_num_ranges() const noexcept { @@ -127,11 +135,15 @@ class Partition /** * Returns the number of parts represented in this partition. + * + * @return number of parts. */ comm_index_type get_num_parts() const noexcept { return num_parts_; } /** * Returns the number of empty parts within this partition. + * + * @return number of empty parts. */ comm_index_type get_num_empty_parts() const noexcept { @@ -142,6 +154,8 @@ class Partition * Returns the ranges boundary array stored by this partition. * `range_bounds[i]` is the beginning (inclusive) and * `range_bounds[i + 1]` is the end (exclusive) of the ith range. + * + * @return range boundaries array. */ const global_index_type* get_range_bounds() const noexcept { @@ -149,9 +163,11 @@ class Partition } /** - * Returns the part ID array stored by this partition. + * Returns the part IDs of the ranges in this partition. * For each range from get_range_bounds(), it stores the part ID in the - * range [0, get_num_parts() - 1]. + * interval [0, get_num_parts() - 1]. + * + * @return part ID array. */ const comm_index_type* get_part_ids() const noexcept { @@ -167,7 +183,9 @@ class Partition * p_2 = [4-7). * ``` * Then `range_starting_indices[0] = 0`, `range_starting_indices[1] = 0`, - * `range_starting_indices[2] = 5`. + * `range_starting_indices[2] = 4`. + * + * @return part-local starting index array. */ const local_index_type* get_range_starting_indices() const noexcept { @@ -176,7 +194,9 @@ class Partition /** * Returns the part size array. - * part_sizes[p] stores the number of elements in part `p`. + * part_sizes[p] stores the total number of indices in part `p`. + * + * @return part size array. */ const local_index_type* get_part_sizes() const noexcept { @@ -184,8 +204,12 @@ class Partition } /** - * Returns the part size array. - * part_sizes[p] stores the number of elements in part `p`. + * Returns the size of a part given by its part ID. + * @warning Triggers a copy from device to host. + * + * @param part the part ID. + * + * @return size of part. */ local_index_type get_part_size(comm_index_type part) const { @@ -195,6 +219,8 @@ class Partition /** * Checks if each part has no more than one contiguous range. + * + * @return true if each part has no more than one contiguous range. */ bool has_connected_parts(); @@ -202,10 +228,11 @@ class Partition * Checks if the ranges are ordered by their part index. * * Implies that the partition is connected. + * + * @return true if the ranges are ordered by their part index. */ bool has_ordered_parts(); - /** * Builds a partition from a given mapping global_index -> part_id. * @@ -225,6 +252,7 @@ class Partition * @param exec the Executor on which the partition should be built * @param ranges the boundaries of the ranges representing each part. Part i contains the indices [ranges[i], ranges[i + 1]). + Has to contain at least one element. * @return a Partition representing the given contiguous partitioning. */ @@ -257,6 +285,7 @@ class Partition : EnablePolymorphicObject{exec}, num_parts_{num_parts}, num_empty_parts_{0}, + size_{0}, offsets_{exec, num_ranges + 1}, starting_indices_{exec, num_ranges}, part_sizes_{exec, static_cast(num_parts)}, @@ -269,13 +298,15 @@ class Partition } /** - * Compute the range_starting_indices and part_sizes based on the current - * range_bounds and part_ids. + * Finalizes the construction in the create_* methods, by computing the + * range_starting_indices_ and part_sizes_ based on the current + * range_bounds_ and part_ids_, and setting size_ correctly. */ - void compute_range_starting_indices(); + void finalize_construction(); comm_index_type num_parts_; comm_index_type num_empty_parts_; + global_index_type size_; Array offsets_; Array starting_indices_; Array part_sizes_; diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index 171e7ffc95f..d237d9783cd 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -36,6 +36,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include "core/base/allocator.hpp" @@ -54,7 +57,8 @@ void build_starting_indices(std::shared_ptr exec, { std::fill_n(sizes, num_parts, 0); auto num_threads = static_cast(omp_get_max_threads()); - auto size_per_thread = (num_ranges + num_threads - 1) / num_threads; + auto size_per_thread = + static_cast(ceildiv(num_ranges, num_threads)); vector local_sizes(num_parts * num_threads, 0, {exec}); #pragma omp parallel { diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index 909dcf914a4..a08aa7b53bf 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -72,6 +72,7 @@ class Partition : public ::testing::Test { typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; using global_index_type = typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; + Partition() : ref(gko::ReferenceExecutor::create()) {} std::shared_ptr ref; @@ -157,6 +158,25 @@ TYPED_TEST(Partition, BuildsFromRanges) assert_equal_data(partition->get_part_sizes(), {5, 0, 2, 2, 1}); } + +TYPED_TEST(Partition, BuildsFromRangeWithSingleElement) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::Array ranges{this->ref, {0}}; + + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + + EXPECT_EQ(partition->get_size(), 0); + EXPECT_EQ(partition->get_num_ranges(), 0); + EXPECT_EQ(partition->get_num_parts(), 0); + EXPECT_EQ(partition->get_num_empty_parts(), 0); + assert_equal_data(partition->get_range_bounds(), {0}); +} + + TYPED_TEST(Partition, BuildsFromGlobalSize) { using local_index_type = typename TestFixture::local_index_type; @@ -256,6 +276,7 @@ TYPED_TEST(Partition, IsConnectedUnordered) gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); ASSERT_TRUE(part->has_connected_parts()); + ASSERT_FALSE(part->has_ordered_parts()); } @@ -281,13 +302,13 @@ TYPED_TEST(Partition, IsOrdered) gko::distributed::Partition:: build_from_mapping( this->ref, - gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); + gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3)); - ASSERT_FALSE(part->has_ordered_parts()); + ASSERT_TRUE(part->has_ordered_parts()); } -TYPED_TEST(Partition, IsOrderedFail) +TYPED_TEST(Partition, IsOrderedWithEmptyParts) { using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; @@ -295,10 +316,24 @@ TYPED_TEST(Partition, IsOrderedFail) gko::distributed::Partition:: build_from_mapping( this->ref, - gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3)); + gko::Array{this->ref, {0, 2, 2, 5, 5}}, 6)); ASSERT_TRUE(part->has_ordered_parts()); } +TYPED_TEST(Partition, IsOrderedFail) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + auto part = gko::share( + gko::distributed::Partition:: + build_from_mapping( + this->ref, + gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); + + ASSERT_FALSE(part->has_ordered_parts()); +} + + } // namespace diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 7f0f104fb50..7a1546394e5 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -63,6 +63,7 @@ class Partition : public ::testing::Test { typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; using global_index_type = typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; + Partition() : rand_engine(96457) {} void SetUp() @@ -133,10 +134,8 @@ TYPED_TEST(Partition, BuildsFromMapping) using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; std::uniform_int_distribution part_dist{0, num_parts - 1}; - gko::Array mapping{this->ref, 10000}; - for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { - mapping.get_data()[i] = part_dist(this->rand_engine); - } + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); gko::Array dmapping{this->exec, mapping}; auto part = gko::distributed::Partition< @@ -159,10 +158,8 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) comm_index_type num_parts = 7; // skip part 0 std::uniform_int_distribution part_dist{1, num_parts - 1}; - gko::Array mapping{this->ref, 10000}; - for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { - mapping.get_data()[i] = part_dist(this->rand_engine); - } + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); gko::Array dmapping{this->exec, mapping}; auto part = gko::distributed::Partition< @@ -185,10 +182,8 @@ TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) comm_index_type num_parts = 7; // return only part 1 std::uniform_int_distribution part_dist{1, 1}; - gko::Array mapping{this->ref, 10000}; - for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { - mapping.get_data()[i] = part_dist(this->rand_engine); - } + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); gko::Array dmapping{this->exec, mapping}; auto part = gko::distributed::Partition< @@ -285,7 +280,7 @@ TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) } -TYPED_TEST(Partition, BuildsFromContiguousWithSomeMostlyEmptyParts) +TYPED_TEST(Partition, BuildsFromContiguousWithMostlyEmptyParts) { using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; @@ -340,6 +335,24 @@ TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) } +TYPED_TEST(Partition, BuildsFromContiguousWithSingleEntry) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::Array ranges{this->ref, {0}}; + gko::Array dranges{this->exec, ranges}; + + auto part = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->ref, + ranges); + auto dpart = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_contiguous(this->exec, + dranges); + + this->assert_equal(part, dpart); +} + + TYPED_TEST(Partition, BuildsFromGlobalSize) { using local_index_type = typename TestFixture::local_index_type; @@ -443,10 +456,8 @@ TYPED_TEST(Partition, IsOrderedRandom) using global_index_type = typename TestFixture::global_index_type; comm_index_type num_parts = 7; std::uniform_int_distribution part_dist{0, num_parts - 1}; - gko::Array mapping{this->ref, 10000}; - for (gko::size_type i = 0; i < mapping.get_num_elems(); i++) { - mapping.get_data()[i] = part_dist(this->rand_engine); - } + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); auto part = gko::distributed::Partition< local_index_type, global_index_type>::build_from_mapping(this->ref, mapping, From 42b25bcf9566a0f1a9948097244f7e15d1adc198 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 25 Nov 2021 11:34:12 +0100 Subject: [PATCH 57/59] disables dpcpp for partition common test --- cmake/create_test.cmake | 8 ++++++-- test/distributed/CMakeLists.txt | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index de5fd2bbcda..32162604869 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -163,6 +163,7 @@ ginkgo_build_test_name(${test_name} test_target_name) endfunction(ginkgo_create_hip_test) function(ginkgo_create_common_test test_name) + cmake_parse_arguments(PARSE_ARGV 1 common_test "" "" "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES") set(executors) if(GINKGO_BUILD_OMP) list(APPEND executors omp) @@ -176,6 +177,9 @@ function(ginkgo_create_common_test test_name) if(GINKGO_BUILD_DPCPP) list(APPEND executors dpcpp) endif() + foreach(disabled_exec ${common_test_DISABLE_EXECUTORS}) + list(REMOVE_ITEM executors ${disabled_exec}) + endforeach() foreach(exec ${executors}) ginkgo_build_test_name(${test_name} test_target_name) # build executor typename out of shorthand @@ -189,7 +193,7 @@ function(ginkgo_create_common_test test_name) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec}) - target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + target_link_libraries(${test_target_name} PRIVATE ${common_test_ADDITIONAL_LIBRARIES}) # use float for DPC++ if necessary if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE) target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1) @@ -209,4 +213,4 @@ function(ginkgo_create_common_and_reference_test test_name) target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) ginkgo_set_test_target_properties(${test_name}_reference ${test_target_name}) -endfunction() \ No newline at end of file +endfunction() diff --git a/test/distributed/CMakeLists.txt b/test/distributed/CMakeLists.txt index 6c9305372cc..b4e2fbff054 100644 --- a/test/distributed/CMakeLists.txt +++ b/test/distributed/CMakeLists.txt @@ -1 +1 @@ -ginkgo_create_common_test(partition_kernels) +ginkgo_create_common_test(partition_kernels DISABLE_EXECUTORS dpcpp) From 810d86c31e054c322add1d312772c337599f81c8 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 25 Nov 2021 12:42:56 +0100 Subject: [PATCH 58/59] fix kernel launch issues for certain cuda versions --- .../distributed/partition_kernels.hpp.inc | 62 ++++++++++--------- cuda/base/kernel_launch.cuh | 26 ++++---- cuda/base/kernel_launch_solver.cuh | 14 +++-- hip/base/kernel_launch.hip.hpp | 28 +++++---- hip/base/kernel_launch_solver.hip.hpp | 18 +++--- 5 files changed, 84 insertions(+), 64 deletions(-) diff --git a/common/cuda_hip/distributed/partition_kernels.hpp.inc b/common/cuda_hip/distributed/partition_kernels.hpp.inc index 49e08dadfc5..d66fa06321b 100644 --- a/common/cuda_hip/distributed/partition_kernels.hpp.inc +++ b/common/cuda_hip/distributed/partition_kernels.hpp.inc @@ -43,36 +43,42 @@ void build_starting_indices(std::shared_ptr exec, LocalIndexType* starting_indices, LocalIndexType* part_sizes) { - Array range_sizes{exec, num_ranges}; - // num_parts sentinel at the end - Array tmp_part_ids{exec, num_ranges + 1}; - Array permutation{exec, num_ranges}; - // set part_sizes to 0 in case of empty parts - components::fill_array(exec, part_sizes, num_parts, LocalIndexType{}); + if (num_ranges > 0) { + Array range_sizes{exec, num_ranges}; + // num_parts sentinel at the end + Array tmp_part_ids{exec, num_ranges + 1}; + Array permutation{exec, num_ranges}; + // set part_sizes to 0 in case of empty parts + components::fill_array(exec, part_sizes, num_parts, LocalIndexType{}); + kernel::setup_sizes_ids_permutation( + exec, num_ranges, num_parts, range_offsets, range_parts, + range_sizes, tmp_part_ids, permutation); - kernel::setup_sizes_ids_permutation(exec, num_ranges, num_parts, - range_offsets, range_parts, range_sizes, - tmp_part_ids, permutation); - - auto tmp_part_id_ptr = thrust::device_pointer_cast(tmp_part_ids.get_data()); - auto range_sizes_ptr = thrust::device_pointer_cast(range_sizes.get_data()); - auto permutation_ptr = thrust::device_pointer_cast(permutation.get_data()); - auto value_it = thrust::make_zip_iterator( - thrust::make_tuple(range_sizes_ptr, permutation_ptr)); - // group range_sizes by part ID - thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, - tmp_part_id_ptr + num_ranges, value_it); - // compute inclusive prefix sum for each part - thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, - tmp_part_id_ptr + num_ranges, range_sizes_ptr, - range_sizes_ptr); - // write back the results - kernel::compute_part_sizes_and_starting_indices( - exec, num_ranges, range_sizes, tmp_part_ids, permutation, - starting_indices, part_sizes); - num_empty_parts = - thrust::count(thrust::device, part_sizes, part_sizes + num_parts, 0); + auto tmp_part_id_ptr = + thrust::device_pointer_cast(tmp_part_ids.get_data()); + auto range_sizes_ptr = + thrust::device_pointer_cast(range_sizes.get_data()); + auto permutation_ptr = + thrust::device_pointer_cast(permutation.get_data()); + auto value_it = thrust::make_zip_iterator( + thrust::make_tuple(range_sizes_ptr, permutation_ptr)); + // group range_sizes by part ID + thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, value_it); + // compute inclusive prefix sum for each part + thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, + range_sizes_ptr, range_sizes_ptr); + // write back the results + kernel::compute_part_sizes_and_starting_indices( + exec, num_ranges, range_sizes, tmp_part_ids, permutation, + starting_indices, part_sizes); + num_empty_parts = thrust::count(thrust::device, part_sizes, + part_sizes + num_parts, 0); + } else { + num_empty_parts = num_parts; + } } GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/cuda/base/kernel_launch.cuh b/cuda/base/kernel_launch.cuh index 5179a5cc27d..7166a817e13 100644 --- a/cuda/base/kernel_launch.cuh +++ b/cuda/base/kernel_launch.cuh @@ -79,23 +79,27 @@ template void run_kernel(std::shared_ptr exec, KernelFunction fn, size_type size, KernelArgs&&... args) { - gko::cuda::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size, block_size); - generic_kernel_1d<<>>(static_cast(size), fn, - map_to_device(args)...); + if (size > 0) { + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size, block_size); + generic_kernel_1d<<>>( + static_cast(size), fn, map_to_device(args)...); + } } template void run_kernel(std::shared_ptr exec, KernelFunction fn, dim<2> size, KernelArgs&&... args) { - gko::cuda::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - generic_kernel_2d<<>>(static_cast(size[0]), - static_cast(size[1]), - fn, map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + generic_kernel_2d<<>>( + static_cast(size[0]), static_cast(size[1]), fn, + map_to_device(args)...); + } } diff --git a/cuda/base/kernel_launch_solver.cuh b/cuda/base/kernel_launch_solver.cuh index f4da60ddede..16f0540cff7 100644 --- a/cuda/base/kernel_launch_solver.cuh +++ b/cuda/base/kernel_launch_solver.cuh @@ -62,12 +62,14 @@ void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, KernelArgs&&... args) { - gko::cuda::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - generic_kernel_2d_solver<<>>( - static_cast(size[0]), static_cast(size[1]), - static_cast(default_stride), fn, map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + generic_kernel_2d_solver<<>>( + static_cast(size[0]), static_cast(size[1]), + static_cast(default_stride), fn, map_to_device(args)...); + } } diff --git a/hip/base/kernel_launch.hip.hpp b/hip/base/kernel_launch.hip.hpp index 6c627838fea..7831b5925a2 100644 --- a/hip/base/kernel_launch.hip.hpp +++ b/hip/base/kernel_launch.hip.hpp @@ -82,23 +82,29 @@ template void run_kernel(std::shared_ptr exec, KernelFunction fn, size_type size, KernelArgs&&... args) { - gko::hip::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size, block_size); - hipLaunchKernelGGL(generic_kernel_1d, num_blocks, block_size, 0, 0, - static_cast(size), fn, map_to_device(args)...); + if (size > 0) { + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size, block_size); + hipLaunchKernelGGL(generic_kernel_1d, num_blocks, block_size, 0, 0, + static_cast(size), fn, + map_to_device(args)...); + } } template void run_kernel(std::shared_ptr exec, KernelFunction fn, dim<2> size, KernelArgs&&... args) { - gko::hip::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - hipLaunchKernelGGL(generic_kernel_2d, num_blocks, block_size, 0, 0, - static_cast(size[0]), static_cast(size[1]), - fn, map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + hipLaunchKernelGGL(generic_kernel_2d, num_blocks, block_size, 0, 0, + static_cast(size[0]), + static_cast(size[1]), fn, + map_to_device(args)...); + } } diff --git a/hip/base/kernel_launch_solver.hip.hpp b/hip/base/kernel_launch_solver.hip.hpp index 9798f6c4fbc..46abe82c415 100644 --- a/hip/base/kernel_launch_solver.hip.hpp +++ b/hip/base/kernel_launch_solver.hip.hpp @@ -65,14 +65,16 @@ void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, KernelArgs&&... args) { - gko::hip::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = kernels::hip::default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - hipLaunchKernelGGL(kernels::hip::generic_kernel_2d_solver, num_blocks, - block_size, 0, 0, static_cast(size[0]), - static_cast(size[1]), - static_cast(default_stride), fn, - kernels::hip::map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = kernels::hip::default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + hipLaunchKernelGGL(kernels::hip::generic_kernel_2d_solver, num_blocks, + block_size, 0, 0, static_cast(size[0]), + static_cast(size[1]), + static_cast(default_stride), fn, + kernels::hip::map_to_device(args)...); + } } From 5bb5436919c279fad6fbd4d9113b301180446027 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 29 Nov 2021 09:32:03 +0100 Subject: [PATCH 59/59] review updates - merge /common/unified/.../partitions_kernels.hpp.inc into /commun/cuda_hip/.../partitions_kernsls.hpp.inc - documentation Co-authored-by: Yu-Hsiang Tsai --- .../distributed/partition_kernels.hpp.inc | 61 +++++- .../distributed/partition_kernels.hpp.inc | 92 -------- dpcpp/distributed/partition_kernels.dp.cpp | 2 + include/ginkgo/core/distributed/partition.hpp | 7 +- .../test/distributed/partition_kernels.cpp | 122 ++++------- test/distributed/partition_kernels.cpp | 199 ++++++------------ 6 files changed, 164 insertions(+), 319 deletions(-) delete mode 100644 common/unified/distributed/partition_kernels.hpp.inc diff --git a/common/cuda_hip/distributed/partition_kernels.hpp.inc b/common/cuda_hip/distributed/partition_kernels.hpp.inc index d66fa06321b..e8c0359533f 100644 --- a/common/cuda_hip/distributed/partition_kernels.hpp.inc +++ b/common/cuda_hip/distributed/partition_kernels.hpp.inc @@ -31,7 +31,66 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "common/unified/distributed/partition_kernels.hpp.inc" +namespace kernel { + + +template +void setup_sizes_ids_permutation( + std::shared_ptr exec, size_type num_ranges, + comm_index_type num_parts, const GlobalIndexType* range_offsets, + const comm_index_type* range_parts, Array& range_sizes, + Array& part_ids, Array& permutation) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, + auto range_offsets, auto range_parts, auto range_sizes, + auto part_ids, auto permutation) { + if (i == 0) { + // set sentinel value at the end + part_ids[num_ranges] = num_parts; + } + range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; + part_ids[i] = range_parts[i]; + permutation[i] = static_cast(i); + }, + num_ranges, num_ranges, num_parts, range_offsets, range_parts, + range_sizes.get_data(), part_ids.get_data(), permutation.get_data()); +} + + +template +void compute_part_sizes_and_starting_indices( + std::shared_ptr exec, size_type num_ranges, + const Array& range_sizes, + const Array& part_ids, + const Array& permutation, LocalIndexType* starting_indices, + LocalIndexType* part_sizes) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto grouped_starting_indices, + auto grouped_part_ids, auto orig_idxs, + auto starting_indices, auto part_sizes) { + auto prev_part = i > 0 ? grouped_part_ids[i - 1] + : invalid_index(); + auto cur_part = grouped_part_ids[i]; + auto next_part = + grouped_part_ids[i + 1]; // last element has to be num_parts + if (cur_part != next_part) { + part_sizes[cur_part] = grouped_starting_indices[i]; + } + // write result shifted by one entry to get exclusive prefix sum + starting_indices[orig_idxs[i]] = + prev_part == cur_part ? grouped_starting_indices[i - 1] + : LocalIndexType{}; + }, + num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(), + permutation.get_const_data(), starting_indices, part_sizes); +} + + +} // namespace kernel template diff --git a/common/unified/distributed/partition_kernels.hpp.inc b/common/unified/distributed/partition_kernels.hpp.inc deleted file mode 100644 index a702334945f..00000000000 --- a/common/unified/distributed/partition_kernels.hpp.inc +++ /dev/null @@ -1,92 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -namespace kernel { - - -template -void setup_sizes_ids_permutation( - std::shared_ptr exec, size_type num_ranges, - comm_index_type num_parts, const GlobalIndexType* range_offsets, - const comm_index_type* range_parts, Array& range_sizes, - Array& part_ids, Array& permutation) -{ - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, - auto range_offsets, auto range_parts, auto range_sizes, - auto part_ids, auto permutation) { - if (i == 0) { - // set sentinel value at the end - part_ids[num_ranges] = num_parts; - } - range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; - part_ids[i] = range_parts[i]; - permutation[i] = static_cast(i); - }, - num_ranges, num_ranges, num_parts, range_offsets, range_parts, - range_sizes.get_data(), part_ids.get_data(), permutation.get_data()); -} - - -template -void compute_part_sizes_and_starting_indices( - std::shared_ptr exec, size_type num_ranges, - const Array& range_sizes, - const Array& part_ids, - const Array& permutation, LocalIndexType* starting_indices, - LocalIndexType* part_sizes) -{ - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto grouped_starting_indices, - auto grouped_part_ids, auto orig_idxs, - auto starting_indices, auto part_sizes) { - auto prev_part = i > 0 ? grouped_part_ids[i - 1] - : invalid_index(); - auto cur_part = grouped_part_ids[i]; - auto next_part = - grouped_part_ids[i + 1]; // last element has to be num_parts - if (cur_part != next_part) { - part_sizes[cur_part] = grouped_starting_indices[i]; - } - // write result shifted by one entry to get exclusive prefix sum - starting_indices[orig_idxs[i]] = - prev_part == cur_part ? grouped_starting_indices[i - 1] - : LocalIndexType{}; - }, - num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(), - permutation.get_const_data(), starting_indices, part_sizes); -} - - -} // namespace kernel diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 65f491c10df..c52c31f5241 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -39,6 +39,8 @@ namespace dpcpp { namespace partition { +// TODO: wait until https://github.com/oneapi-src/oneDPL/pull/388 is release to +// implement it similar to cuda/hip template void build_starting_indices(std::shared_ptr exec, const GlobalIndexType* range_offsets, diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 496da0be91b..36faf35bf37 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -251,9 +251,10 @@ class Partition * * @param exec the Executor on which the partition should be built * @param ranges the boundaries of the ranges representing each part. - Part i contains the indices [ranges[i], ranges[i + 1]). - Has to contain at least one element. - + * Part i contains the indices [ranges[i], ranges[i + 1]). + * Has to contain at least one element. + * The first element has to be 0. + * * @return a Partition representing the given contiguous partitioning. */ static std::unique_ptr build_from_contiguous( diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index a08aa7b53bf..521736d4b42 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -72,6 +72,8 @@ class Partition : public ::testing::Test { typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; using global_index_type = typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; Partition() : ref(gko::ReferenceExecutor::create()) {} @@ -83,17 +85,14 @@ TYPED_TEST_SUITE(Partition, gko::test::LocalGlobalIndexTypes); TYPED_TEST(Partition, BuildsFromMapping) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array mapping{ this->ref, {2, 2, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0}}; comm_index_type num_parts = 3; gko::size_type num_ranges = 10; - auto partition = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); + auto partition = + part_type::build_from_mapping(this->ref, mapping, num_parts); EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); EXPECT_EQ(partition->get_num_ranges(), num_ranges); @@ -111,17 +110,14 @@ TYPED_TEST(Partition, BuildsFromMapping) TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array mapping{ this->ref, {3, 3, 0, 1, 1, 3, 0, 0, 1, 0, 1, 1, 1, 3, 3, 0}}; comm_index_type num_parts = 5; gko::size_type num_ranges = 10; - auto partition = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); + auto partition = + part_type::build_from_mapping(this->ref, mapping, num_parts); EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); EXPECT_EQ(partition->get_num_ranges(), num_ranges); @@ -139,13 +135,11 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) TYPED_TEST(Partition, BuildsFromRanges) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{this->ref, {0, 5, 5, 7, 9, 10}}; - auto partition = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); + auto partition = part_type::build_from_contiguous(this->ref, ranges); EXPECT_EQ(partition->get_size(), ranges.get_data()[ranges.get_num_elems() - 1]); @@ -161,13 +155,11 @@ TYPED_TEST(Partition, BuildsFromRanges) TYPED_TEST(Partition, BuildsFromRangeWithSingleElement) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{this->ref, {0}}; - auto partition = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); + auto partition = part_type::build_from_contiguous(this->ref, ranges); EXPECT_EQ(partition->get_size(), 0); EXPECT_EQ(partition->get_num_ranges(), 0); @@ -179,12 +171,10 @@ TYPED_TEST(Partition, BuildsFromRangeWithSingleElement) TYPED_TEST(Partition, BuildsFromGlobalSize) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; auto partition = - gko::distributed::Partition:: - build_from_global_size_uniform(this->ref, 5, 13); + part_type::build_from_global_size_uniform(this->ref, 5, 13); EXPECT_EQ(partition->get_size(), 13); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -199,12 +189,9 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; - auto partition = - gko::distributed::Partition:: - build_from_global_size_uniform(this->ref, 5, 0); + auto partition = part_type::build_from_global_size_uniform(this->ref, 5, 0); EXPECT_EQ(partition->get_size(), 0); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -219,12 +206,9 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; - auto partition = - gko::distributed::Partition:: - build_from_global_size_uniform(this->ref, 5, 3); + auto partition = part_type::build_from_global_size_uniform(this->ref, 5, 3); EXPECT_EQ(partition->get_size(), 3); EXPECT_EQ(partition->get_num_ranges(), 5); @@ -239,13 +223,9 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) TYPED_TEST(Partition, IsConnected) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {0, 0, 1, 1, 2}}, 3)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, 3); ASSERT_TRUE(part->has_connected_parts()); } @@ -253,13 +233,9 @@ TYPED_TEST(Partition, IsConnected) TYPED_TEST(Partition, IsConnectedWithEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {0, 0, 2, 2, 5}}, 6)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 2, 2, 5}}, 6); ASSERT_TRUE(part->has_connected_parts()); } @@ -267,13 +243,9 @@ TYPED_TEST(Partition, IsConnectedWithEmptyParts) TYPED_TEST(Partition, IsConnectedUnordered) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3); ASSERT_TRUE(part->has_connected_parts()); ASSERT_FALSE(part->has_ordered_parts()); @@ -282,13 +254,9 @@ TYPED_TEST(Partition, IsConnectedUnordered) TYPED_TEST(Partition, IsConnectedFail) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {0, 1, 2, 0, 1}}, 3)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, 3); ASSERT_FALSE(part->has_connected_parts()); } @@ -296,13 +264,9 @@ TYPED_TEST(Partition, IsConnectedFail) TYPED_TEST(Partition, IsOrdered) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3); ASSERT_TRUE(part->has_ordered_parts()); } @@ -310,13 +274,9 @@ TYPED_TEST(Partition, IsOrdered) TYPED_TEST(Partition, IsOrderedWithEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {0, 2, 2, 5, 5}}, 6)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 2, 2, 5, 5}}, 6); ASSERT_TRUE(part->has_ordered_parts()); } @@ -324,13 +284,9 @@ TYPED_TEST(Partition, IsOrderedWithEmptyParts) TYPED_TEST(Partition, IsOrderedFail) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; - auto part = gko::share( - gko::distributed::Partition:: - build_from_mapping( - this->ref, - gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3)); + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3); ASSERT_FALSE(part->has_ordered_parts()); } diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 7a1546394e5..bc3187ab1bc 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -63,6 +63,8 @@ class Partition : public ::testing::Test { typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; using global_index_type = typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; Partition() : rand_engine(96457) {} @@ -79,11 +81,8 @@ class Partition : public ::testing::Test { } } - void assert_equal( - std::unique_ptr>& part, - std::unique_ptr>& dpart) + void assert_equal(std::unique_ptr& part, + std::unique_ptr& dpart) { ASSERT_EQ(part->get_size(), dpart->get_size()); ASSERT_EQ(part->get_num_ranges(), dpart->get_num_ranges()); @@ -130,22 +129,15 @@ TYPED_TEST_SUITE(Partition, gko::test::LocalGlobalIndexTypes); TYPED_TEST(Partition, BuildsFromMapping) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; std::uniform_int_distribution part_dist{0, num_parts - 1}; auto mapping = gko::test::generate_random_array( 10000, part_dist, this->rand_engine, this->ref); gko::Array dmapping{this->exec, mapping}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - dmapping, - num_parts); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); this->assert_equal(part, dpart); } @@ -153,8 +145,7 @@ TYPED_TEST(Partition, BuildsFromMapping) TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; // skip part 0 std::uniform_int_distribution part_dist{1, num_parts - 1}; @@ -162,14 +153,8 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) 10000, part_dist, this->rand_engine, this->ref); gko::Array dmapping{this->exec, mapping}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - dmapping, - num_parts); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); this->assert_equal(part, dpart); } @@ -177,8 +162,7 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; // return only part 1 std::uniform_int_distribution part_dist{1, 1}; @@ -186,14 +170,8 @@ TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) 10000, part_dist, this->rand_engine, this->ref); gko::Array dmapping{this->exec, mapping}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - dmapping, - num_parts); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); this->assert_equal(part, dpart); } @@ -201,20 +179,13 @@ TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) TYPED_TEST(Partition, BuildsFromMappingWithAllPartsEmpty) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; gko::Array mapping{this->ref, 0}; gko::Array dmapping{this->exec, 0}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - dmapping, - num_parts); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); this->assert_equal(part, dpart); } @@ -222,21 +193,14 @@ TYPED_TEST(Partition, BuildsFromMappingWithAllPartsEmpty) TYPED_TEST(Partition, BuildsFromMappingWithOnePart) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 1; gko::Array mapping{this->ref, 10000}; mapping.fill(0); gko::Array dmapping{this->exec, mapping}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - dmapping, - num_parts); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); this->assert_equal(part, dpart); } @@ -244,18 +208,14 @@ TYPED_TEST(Partition, BuildsFromMappingWithOnePart) TYPED_TEST(Partition, BuildsFromContiguous) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{this->ref, {0, 1234, 3134, 4578, 16435, 60000}}; gko::Array dranges{this->exec, ranges}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->exec, - dranges); + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); this->assert_equal(part, dpart); } @@ -263,18 +223,14 @@ TYPED_TEST(Partition, BuildsFromContiguous) TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{ this->ref, {0, 1234, 3134, 3134, 4578, 16435, 16435, 60000}}; gko::Array dranges{this->exec, ranges}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->exec, - dranges); + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); this->assert_equal(part, dpart); } @@ -282,18 +238,14 @@ TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) TYPED_TEST(Partition, BuildsFromContiguousWithMostlyEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{ this->ref, {0, 0, 3134, 4578, 4578, 4578, 4578, 4578}}; gko::Array dranges{this->exec, ranges}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->exec, - dranges); + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); this->assert_equal(part, dpart); } @@ -301,17 +253,13 @@ TYPED_TEST(Partition, BuildsFromContiguousWithMostlyEmptyParts) TYPED_TEST(Partition, BuildsFromContiguousWithOnlyEmptyParts) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{this->ref, {0, 0, 0, 0, 0, 0, 0}}; gko::Array dranges{this->exec, ranges}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->exec, - dranges); + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); this->assert_equal(part, dpart); } @@ -319,17 +267,13 @@ TYPED_TEST(Partition, BuildsFromContiguousWithOnlyEmptyParts) TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{this->ref, {0, 0}}; gko::Array dranges{this->exec, ranges}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->exec, - dranges); + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); this->assert_equal(part, dpart); } @@ -337,17 +281,13 @@ TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) TYPED_TEST(Partition, BuildsFromContiguousWithSingleEntry) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; gko::Array ranges{this->ref, {0}}; gko::Array dranges{this->exec, ranges}; - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->ref, - ranges); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_contiguous(this->exec, - dranges); + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); this->assert_equal(part, dpart); } @@ -355,17 +295,15 @@ TYPED_TEST(Partition, BuildsFromContiguousWithSingleEntry) TYPED_TEST(Partition, BuildsFromGlobalSize) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; const int num_parts = 7; const global_index_type global_size = 708; - auto part = - gko::distributed::Partition:: - build_from_global_size_uniform(this->ref, num_parts, global_size); - auto dpart = - gko::distributed::Partition:: - build_from_global_size_uniform(this->exec, num_parts, global_size); + auto part = part_type::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = part_type::build_from_global_size_uniform( + this->exec, num_parts, global_size); this->assert_equal(part, dpart); } @@ -373,17 +311,15 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; const int num_parts = 7; const global_index_type global_size = 0; - auto part = - gko::distributed::Partition:: - build_from_global_size_uniform(this->ref, num_parts, global_size); - auto dpart = - gko::distributed::Partition:: - build_from_global_size_uniform(this->exec, num_parts, global_size); + auto part = part_type::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = part_type::build_from_global_size_uniform( + this->exec, num_parts, global_size); this->assert_equal(part, dpart); } @@ -391,17 +327,15 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) { - using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; const int num_parts = 77; const global_index_type global_size = 13; - auto part = - gko::distributed::Partition:: - build_from_global_size_uniform(this->ref, num_parts, global_size); - auto dpart = - gko::distributed::Partition:: - build_from_global_size_uniform(this->exec, num_parts, global_size); + auto part = part_type::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = part_type::build_from_global_size_uniform( + this->exec, num_parts, global_size); this->assert_equal(part, dpart); } @@ -409,8 +343,7 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) TYPED_TEST(Partition, IsOrderedTrue) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; gko::size_type size_per_part = 1000; gko::size_type global_size = num_parts * size_per_part; @@ -419,10 +352,7 @@ TYPED_TEST(Partition, IsOrderedTrue) std::fill(mapping.get_data() + i * size_per_part, mapping.get_data() + (i + 1) * size_per_part, i); } - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - mapping, - num_parts); + auto dpart = part_type::build_from_mapping(this->exec, mapping, num_parts); ASSERT_TRUE(dpart->has_ordered_parts()); } @@ -430,8 +360,7 @@ TYPED_TEST(Partition, IsOrderedTrue) TYPED_TEST(Partition, IsOrderedFail) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; gko::size_type size_per_part = 1000; gko::size_type global_size = num_parts * size_per_part; @@ -441,10 +370,7 @@ TYPED_TEST(Partition, IsOrderedFail) mapping.get_data() + (i + 1) * size_per_part, num_parts - 1 - i); } - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - mapping, - num_parts); + auto dpart = part_type::build_from_mapping(this->exec, mapping, num_parts); ASSERT_FALSE(dpart->has_ordered_parts()); } @@ -452,20 +378,13 @@ TYPED_TEST(Partition, IsOrderedFail) TYPED_TEST(Partition, IsOrderedRandom) { - using local_index_type = typename TestFixture::local_index_type; - using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; comm_index_type num_parts = 7; std::uniform_int_distribution part_dist{0, num_parts - 1}; auto mapping = gko::test::generate_random_array( 10000, part_dist, this->rand_engine, this->ref); - auto part = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->ref, - mapping, - num_parts); - auto dpart = gko::distributed::Partition< - local_index_type, global_index_type>::build_from_mapping(this->exec, - mapping, - num_parts); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, mapping, num_parts); ASSERT_EQ(part->has_ordered_parts(), dpart->has_ordered_parts()); }