diff --git a/common/cuda_hip/distributed/vector_kernels.hpp.inc b/common/cuda_hip/distributed/vector_kernels.hpp.inc new file mode 100644 index 00000000000..f99fc1fd28c --- /dev/null +++ b/common/cuda_hip/distributed/vector_kernels.hpp.inc @@ -0,0 +1,95 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, matrix::Dense* local_mtx) +{ + const auto* range_bounds = partition->get_range_bounds(); + const auto* range_starting_indices = + partition->get_range_starting_indices(); + const auto* part_ids = partition->get_part_ids(); + const auto num_ranges = partition->get_num_ranges(); + + Array range_id{exec, input.get_num_elems()}; + thrust::upper_bound(thrust::device, range_bounds + 1, + range_bounds + num_ranges + 1, + input.get_const_row_idxs(), + input.get_const_row_idxs() + input.get_num_elems(), + range_id.get_data(), thrust::less()); + + // write values with local rows into the local matrix at the correct index + // this needs the following iterators: + // - local_row_it: (global_row, range_id) -> local row index + // - flat_idx_it: (local_row, col) -> flat index in local matrix values + // array + // the flat_idx_it is used by the scatter_if as an index map for the values + auto map_to_local_row = + [range_bounds, range_starting_indices] __host__ __device__( + const thrust::tuple& idx_range_id) { + const auto idx = thrust::get<0>(idx_range_id); + const auto rid = thrust::get<1>(idx_range_id); + return static_cast(idx - range_bounds[rid]) + + range_starting_indices[rid]; + }; + auto local_row_it = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(input.get_const_row_idxs(), + range_id.get_data())), + map_to_local_row); + + auto stride = local_mtx->get_stride(); + auto map_to_flat_idx = + [stride] __host__ __device__( + const thrust::tuple& row_col) { + return thrust::get<0>(row_col) * stride + thrust::get<1>(row_col); + }; + auto flat_idx_it = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(local_row_it, input.get_const_col_idxs())), + map_to_flat_idx); + + auto is_local_row = + [part_ids, local_part] __host__ __device__(const size_type rid) { + return part_ids[rid] == local_part; + }; + thrust::scatter_if(thrust::device, input.get_const_values(), + input.get_const_values() + input.get_num_elems(), + flat_idx_it, range_id.get_data(), + local_mtx->get_values(), is_local_row); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu index 46d834ee0ca..def3fc8ec87 100644 --- a/cuda/distributed/vector_kernels.cu +++ b/cuda/distributed/vector_kernels.cu @@ -33,6 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/distributed/vector_kernels.hpp" +#include +#include +#include +#include +#include +#include + + #include @@ -42,16 +50,7 @@ namespace cuda { namespace distributed_vector { -template -void build_local( - std::shared_ptr exec, - const device_matrix_data& input, - const distributed::Partition* partition, - comm_index_type local_part, - matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); +#include "common/cuda_hip/distributed/vector_kernels.hpp.inc" } // namespace distributed_vector diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp index 1133317e4e4..6cbfa1224e9 100644 --- a/hip/distributed/vector_kernels.hip.cpp +++ b/hip/distributed/vector_kernels.hip.cpp @@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/distributed/vector_kernels.hpp" +#include + + +#include +#include +#include +#include +#include +#include + + #include @@ -42,16 +53,7 @@ namespace hip { namespace distributed_vector { -template -void build_local( - std::shared_ptr exec, - const device_matrix_data& input, - const distributed::Partition* partition, - comm_index_type local_part, - matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); +#include "common/cuda_hip/distributed/vector_kernels.hpp.inc" } // namespace distributed_vector diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index 2ddf2808922..cf7723a11f1 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,7 +2,6 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) -add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(preconditioner) diff --git a/omp/test/distributed/CMakeLists.txt b/omp/test/distributed/CMakeLists.txt deleted file mode 100644 index 61e5d60cb39..00000000000 --- a/omp/test/distributed/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -ginkgo_create_test(vector_kernels) diff --git a/test/distributed/CMakeLists.txt b/test/distributed/CMakeLists.txt index b4e2fbff054..7affdfc066d 100644 --- a/test/distributed/CMakeLists.txt +++ b/test/distributed/CMakeLists.txt @@ -1 +1,2 @@ ginkgo_create_common_test(partition_kernels DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_test(vector_kernels DISABLE_EXECUTORS dpcpp) diff --git a/omp/test/distributed/vector_kernels.cpp b/test/distributed/vector_kernels.cpp similarity index 95% rename from omp/test/distributed/vector_kernels.cpp rename to test/distributed/vector_kernels.cpp index 5ee65cfb24c..93f7f8e10e5 100644 --- a/omp/test/distributed/vector_kernels.cpp +++ b/test/distributed/vector_kernels.cpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include "core/distributed/vector_kernels.hpp" + + #include #include #include @@ -43,8 +46,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/distributed/vector_kernels.hpp" #include "core/test/utils.hpp" +#include "test/utils/executor.hpp" namespace { @@ -68,11 +71,17 @@ class Vector : public ::testing::Test { using global_entry = gko::matrix_data_entry; using mtx = gko::matrix::Dense; - Vector() - : ref(gko::ReferenceExecutor::create()), - exec(gko::OmpExecutor::create()), - engine(42) - {} + Vector() : ref(gko::ReferenceExecutor::create()), engine(42) + { + init_executor(ref, exec); + } + + void TearDown() + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } void validate( const gko::distributed::Partition* @@ -94,20 +103,23 @@ class Vector : public ::testing::Test { gko::kernels::reference::distributed_vector::build_local( ref, input, partition, part, output.get()); - gko::kernels::omp::distributed_vector::build_local( + gko::kernels::EXEC_NAMESPACE::distributed_vector::build_local( exec, d_input, d_partition, part, d_output.get()); GKO_ASSERT_MTX_NEAR(output, d_output, 0); } } - std::shared_ptr ref; - std::shared_ptr exec; + std::shared_ptr ref; + std::shared_ptr exec; std::default_random_engine engine; }; + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + + template - gko::device_matrix_data generate_random_matrix_data_array( gko::size_type num_rows, gko::size_type num_cols, NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist, @@ -122,8 +134,6 @@ gko::device_matrix_data generate_random_matrix_data_array( md); } -TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); - TYPED_TEST(Vector, BuildsLocalEmptyIsEquivalentToRef) { diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 3f8abcc8422..c7eb1305e19 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -114,14 +114,12 @@ class VectorCreation : public ::testing::Test { size{local_size[1] * comm.size(), 11}, md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, md_localized{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}} - {} - - void SetUp() override { - ASSERT_EQ(this->comm.size(), 3); - init_executor(gko::ReferenceExecutor::create(), exec); + init_executor(gko::ReferenceExecutor::create(), exec, comm); } + void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + void TearDown() override { if (exec != nullptr) { @@ -146,7 +144,7 @@ class VectorCreation : public ::testing::Test { TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); -#ifdef GKO_COMPILING_REFERENCE +#ifndef GKO_COMPILING_DPCPP TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) @@ -373,9 +371,9 @@ class VectorReductions : public ::testing::Test { size{53, 11}, engine(42) { - init_executor(ref, exec, comm); + init_executor(gko::ReferenceExecutor::create(), exec, comm); - logger = gko::share(HostToDeviceLogger::create(exec)); + logger = gko::share(HostToDeviceLogger::create(ref)); exec->add_logger(logger); dense_x = dense_type::create(exec); @@ -420,11 +418,7 @@ class VectorReductions : public ::testing::Test { y = gko::clone(exec, tmp_y); } - void SetUp() override - { - ASSERT_GT(comm.size(), 0); - init_executor(gko::ReferenceExecutor::create(), exec); - } + void SetUp() override { ASSERT_GT(comm.size(), 0); } void TearDown() override { @@ -597,11 +591,7 @@ class VectorLocalOps : public ::testing::Test { complex = complex_dist_vec_type::create(exec, comm); } - void SetUp() override - { - ASSERT_GT(comm.size(), 0); - init_executor(gko::ReferenceExecutor::create(), exec); - } + void SetUp() override { ASSERT_GT(comm.size(), 0); } void TearDown() override {