From c60fbd060ed8191ba7b01162b4312668ed7223c0 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 24 May 2024 15:32:27 +0000 Subject: [PATCH] [dist-mat] store index map this also changes one constructor --- core/distributed/matrix.cpp | 181 ++++++++++----------- include/ginkgo/core/distributed/matrix.hpp | 17 +- test/mpi/matrix.cpp | 43 ++--- 3 files changed, 118 insertions(+), 123 deletions(-) diff --git a/core/distributed/matrix.cpp b/core/distributed/matrix.cpp index 63f359cc40a..7805ad800c5 100644 --- a/core/distributed/matrix.cpp +++ b/core/distributed/matrix.cpp @@ -28,6 +28,62 @@ GKO_REGISTER_OPERATION(separate_local_nonlocal, } // namespace matrix +template +void initialize_communication_pattern( + std::shared_ptr exec, mpi::communicator comm, + const index_map& imap, + std::vector& recv_sizes, + std::vector& recv_offsets, + std::vector& send_sizes, + std::vector& send_offsets, + array& gather_idxs) +{ + // exchange step 1: determine recv_sizes, send_sizes, send_offsets + auto host_recv_targets = + make_temporary_clone(exec->get_master(), &imap.get_remote_target_ids()); + auto host_offsets = make_temporary_clone( + exec->get_master(), &imap.get_remote_global_idxs().get_offsets()); + auto compute_recv_sizes = [](const auto* recv_targets, size_type size, + const auto* offsets, auto& recv_sizes) { + for (size_type i = 0; i < size; ++i) { + recv_sizes[recv_targets[i]] = offsets[i + 1] - offsets[i]; + } + }; + std::fill(recv_sizes.begin(), recv_sizes.end(), 0); + compute_recv_sizes(host_recv_targets->get_const_data(), + host_recv_targets->get_size(), + host_offsets->get_const_data(), recv_sizes); + std::partial_sum(recv_sizes.begin(), recv_sizes.end(), + recv_offsets.begin() + 1); + comm.all_to_all(exec, recv_sizes.data(), 1, send_sizes.data(), 1); + std::partial_sum(send_sizes.begin(), send_sizes.end(), + send_offsets.begin() + 1); + send_offsets[0] = 0; + recv_offsets[0] = 0; + + // exchange step 2: exchange gather_idxs from receivers to senders + auto recv_gather_idxs = + make_const_array_view( + imap.get_executor(), imap.get_non_local_size(), + imap.get_remote_local_idxs().get_const_flat_data()) + .copy_to_array(); + auto use_host_buffer = mpi::requires_host_buffer(exec, comm); + if (use_host_buffer) { + recv_gather_idxs.set_executor(exec->get_master()); + gather_idxs.clear(); + gather_idxs.set_executor(exec->get_master()); + } + gather_idxs.resize_and_reset(send_offsets.back()); + comm.all_to_all_v(use_host_buffer ? exec->get_master() : exec, + recv_gather_idxs.get_const_data(), recv_sizes.data(), + recv_offsets.data(), gather_idxs.get_data(), + send_sizes.data(), send_offsets.data()); + if (use_host_buffer) { + gather_idxs.set_executor(exec); + } +} + + template Matrix::Matrix( std::shared_ptr exec, mpi::communicator comm) @@ -45,6 +101,7 @@ Matrix::Matrix( : EnableDistributedLinOp< Matrix>{exec}, DistributedBase{comm}, + imap_(exec), send_offsets_(comm.size() + 1), send_sizes_(comm.size()), recv_offsets_(comm.size() + 1), @@ -72,6 +129,7 @@ Matrix::Matrix( : EnableDistributedLinOp< Matrix>{exec}, DistributedBase{comm}, + imap_(exec), send_offsets_(comm.size() + 1), send_sizes_(comm.size()), recv_offsets_(comm.size() + 1), @@ -90,14 +148,13 @@ Matrix::Matrix( template Matrix::Matrix( - std::shared_ptr exec, mpi::communicator comm, dim<2> size, - std::shared_ptr local_linop, std::shared_ptr non_local_linop, - std::vector recv_sizes, - std::vector recv_offsets, - array recv_gather_idxs) + std::shared_ptr exec, mpi::communicator comm, + index_map imap, + std::shared_ptr local_linop, std::shared_ptr non_local_linop) : EnableDistributedLinOp< Matrix>{exec}, DistributedBase{comm}, + imap_(std::move(imap)), send_offsets_(comm.size() + 1), send_sizes_(comm.size()), recv_offsets_(comm.size() + 1), @@ -106,39 +163,15 @@ Matrix::Matrix( non_local_to_global_{exec}, one_scalar_{} { - this->set_size(size); + this->set_size({imap_.get_global_size(), imap_.get_global_size()}); local_mtx_ = std::move(local_linop); non_local_mtx_ = std::move(non_local_linop); - recv_offsets_ = std::move(recv_offsets); - recv_sizes_ = std::move(recv_sizes); - // build send information from recv copy - // exchange step 1: determine recv_sizes, send_sizes, send_offsets - std::partial_sum(recv_sizes_.begin(), recv_sizes_.end(), - recv_offsets_.begin() + 1); - comm.all_to_all(exec, recv_sizes_.data(), 1, send_sizes_.data(), 1); - std::partial_sum(send_sizes_.begin(), send_sizes_.end(), - send_offsets_.begin() + 1); - send_offsets_[0] = 0; - recv_offsets_[0] = 0; - - // exchange step 2: exchange gather_idxs from receivers to senders - auto use_host_buffer = mpi::requires_host_buffer(exec, comm); - if (use_host_buffer) { - recv_gather_idxs.set_executor(exec->get_master()); - gather_idxs_.clear(); - gather_idxs_.set_executor(exec->get_master()); - } - gather_idxs_.resize_and_reset(send_offsets_.back()); - comm.all_to_all_v(use_host_buffer ? exec->get_master() : exec, - recv_gather_idxs.get_const_data(), recv_sizes_.data(), - recv_offsets_.data(), gather_idxs_.get_data(), - send_sizes_.data(), send_offsets_.data()); - if (use_host_buffer) { - gather_idxs_.set_executor(exec); - } - one_scalar_.init(exec, dim<2>{1, 1}); one_scalar_->fill(one()); + + initialize_communication_pattern( + this->get_executor(), this->get_communicator(), imap_, recv_sizes_, + recv_offsets_, send_sizes_, send_offsets_, gather_idxs_); } @@ -186,15 +219,13 @@ Matrix::create( template std::unique_ptr> Matrix::create( - std::shared_ptr exec, mpi::communicator comm, dim<2> size, - std::shared_ptr local_linop, std::shared_ptr non_local_linop, - std::vector recv_sizes, - std::vector recv_offsets, - array recv_gather_idxs) + std::shared_ptr exec, mpi::communicator comm, + index_map imap, + std::shared_ptr local_linop, std::shared_ptr non_local_linop) { - return std::unique_ptr{new Matrix{exec, comm, size, local_linop, - non_local_linop, recv_sizes, - recv_offsets, recv_gather_idxs}}; + return std::unique_ptr{ + new Matrix{std::move(exec), comm, std::move(imap), + std::move(local_linop), std::move(non_local_linop)}}; } @@ -207,6 +238,7 @@ void Matrix::convert_to( result->get_communicator().size()); result->local_mtx_->copy_from(this->local_mtx_); result->non_local_mtx_->copy_from(this->non_local_mtx_); + result->imap_ = this->imap_; result->gather_idxs_ = this->gather_idxs_; result->send_offsets_ = this->send_offsets_; result->recv_offsets_ = this->recv_offsets_; @@ -226,6 +258,7 @@ void Matrix::move_to( result->get_communicator().size()); result->local_mtx_->move_from(this->local_mtx_); result->non_local_mtx_->move_from(this->non_local_mtx_); + result->imap_ = std::move(this->imap_); result->gather_idxs_ = std::move(this->gather_idxs_); result->send_offsets_ = std::move(this->send_offsets_); result->recv_offsets_ = std::move(this->recv_offsets_); @@ -278,15 +311,15 @@ void Matrix::read_distributed( local_row_idxs, local_col_idxs, local_values, non_local_row_idxs, global_non_local_col_idxs, non_local_values)); - auto imap = index_map( + imap_ = index_map( exec, col_partition, comm.rank(), global_non_local_col_idxs); auto non_local_col_idxs = - imap.map_to_local(global_non_local_col_idxs, index_space::non_local); + imap_.map_to_local(global_non_local_col_idxs, index_space::non_local); non_local_to_global_ = make_const_array_view( - imap.get_executor(), imap.get_remote_global_idxs().get_size(), - imap.get_remote_global_idxs().get_const_flat_data()) + imap_.get_executor(), imap_.get_remote_global_idxs().get_size(), + imap_.get_remote_global_idxs().get_const_flat_data()) .copy_to_array(); // read the local matrix data @@ -298,7 +331,7 @@ void Matrix::read_distributed( exec, dim<2>{num_local_rows, num_local_cols}, std::move(local_row_idxs), std::move(local_col_idxs), std::move(local_values)}; device_matrix_data non_local_data{ - exec, dim<2>{num_local_rows, imap.get_remote_global_idxs().get_size()}, + exec, dim<2>{num_local_rows, imap_.get_remote_global_idxs().get_size()}, std::move(non_local_row_idxs), std::move(non_local_col_idxs), std::move(non_local_values)}; as>(this->local_mtx_) @@ -306,49 +339,9 @@ void Matrix::read_distributed( as>(this->non_local_mtx_) ->read(std::move(non_local_data)); - // exchange step 1: determine recv_sizes, send_sizes, send_offsets - auto host_recv_targets = - make_temporary_clone(exec->get_master(), &imap.get_remote_target_ids()); - auto host_offsets = make_temporary_clone( - exec->get_master(), &imap.get_remote_global_idxs().get_offsets()); - auto compute_recv_sizes = [](const auto* recv_targets, size_type size, - const auto* offsets, auto& recv_sizes) { - for (size_type i = 0; i < size; ++i) { - recv_sizes[recv_targets[i]] = offsets[i + 1] - offsets[i]; - } - }; - std::fill(recv_sizes_.begin(), recv_sizes_.end(), 0); - compute_recv_sizes(host_recv_targets->get_const_data(), - host_recv_targets->get_size(), - host_offsets->get_const_data(), recv_sizes_); - std::partial_sum(recv_sizes_.begin(), recv_sizes_.end(), - recv_offsets_.begin() + 1); - comm.all_to_all(exec, recv_sizes_.data(), 1, send_sizes_.data(), 1); - std::partial_sum(send_sizes_.begin(), send_sizes_.end(), - send_offsets_.begin() + 1); - send_offsets_[0] = 0; - recv_offsets_[0] = 0; - - // exchange step 2: exchange gather_idxs from receivers to senders - auto recv_gather_idxs = - make_const_array_view( - imap.get_executor(), imap.get_non_local_size(), - imap.get_remote_local_idxs().get_const_flat_data()) - .copy_to_array(); - auto use_host_buffer = mpi::requires_host_buffer(exec, comm); - if (use_host_buffer) { - recv_gather_idxs.set_executor(exec->get_master()); - gather_idxs_.clear(); - gather_idxs_.set_executor(exec->get_master()); - } - gather_idxs_.resize_and_reset(send_offsets_.back()); - comm.all_to_all_v(use_host_buffer ? exec->get_master() : exec, - recv_gather_idxs.get_const_data(), recv_sizes_.data(), - recv_offsets_.data(), gather_idxs_.get_data(), - send_sizes_.data(), send_offsets_.data()); - if (use_host_buffer) { - gather_idxs_.set_executor(exec); - } + initialize_communication_pattern(exec, comm, imap_, recv_sizes_, + recv_offsets_, send_sizes_, send_offsets_, + gather_idxs_); } @@ -579,7 +572,8 @@ template Matrix::Matrix(const Matrix& other) : EnableDistributedLinOp>{other.get_executor()}, - DistributedBase{other.get_communicator()} + DistributedBase{other.get_communicator()}, + imap_(other.get_executor()) { *this = other; } @@ -590,7 +584,8 @@ Matrix::Matrix( Matrix&& other) noexcept : EnableDistributedLinOp>{other.get_executor()}, - DistributedBase{other.get_communicator()} + DistributedBase{other.get_communicator()}, + imap_(other.get_executor()) { *this = std::move(other); } @@ -607,6 +602,7 @@ Matrix::operator=( this->set_size(other.get_size()); local_mtx_->copy_from(other.local_mtx_); non_local_mtx_->copy_from(other.non_local_mtx_); + imap_ = other.imap_; gather_idxs_ = other.gather_idxs_; send_offsets_ = other.send_offsets_; recv_offsets_ = other.recv_offsets_; @@ -631,6 +627,7 @@ Matrix::operator=(Matrix&& other) other.set_size({}); local_mtx_->move_from(other.local_mtx_); non_local_mtx_->move_from(other.non_local_mtx_); + imap_ = std::move(other.imap_); gather_idxs_ = std::move(other.gather_idxs_); send_offsets_ = std::move(other.send_offsets_); recv_offsets_ = std::move(other.recv_offsets_); diff --git a/include/ginkgo/core/distributed/matrix.hpp b/include/ginkgo/core/distributed/matrix.hpp index 1e5e33581a9..79cf73d5ad7 100644 --- a/include/ginkgo/core/distributed/matrix.hpp +++ b/include/ginkgo/core/distributed/matrix.hpp @@ -569,11 +569,9 @@ class Matrix */ static std::unique_ptr create( std::shared_ptr exec, mpi::communicator comm, - dim<2> size, std::shared_ptr local_linop, - std::shared_ptr non_local_linop, - std::vector recv_sizes, - std::vector recv_offsets, - array recv_gather_idxs); + index_map imap, + std::shared_ptr local_linop, + std::shared_ptr non_local_linop); /** * Scales the columns of the matrix by the respective entries of the vector. @@ -607,12 +605,10 @@ class Matrix std::shared_ptr local_linop); explicit Matrix(std::shared_ptr exec, - mpi::communicator comm, dim<2> size, + mpi::communicator comm, + index_map imap, std::shared_ptr local_linop, - std::shared_ptr non_local_linop, - std::vector recv_sizes, - std::vector recv_offsets, - array recv_gather_idxs); + std::shared_ptr non_local_linop); /** * Starts a non-blocking communication of the values of b that are shared @@ -630,6 +626,7 @@ class Matrix LinOp* x) const override; private: + index_map imap_; std::vector send_offsets_; std::vector send_sizes_; std::vector recv_offsets_; diff --git a/test/mpi/matrix.cpp b/test/mpi/matrix.cpp index 1c090b6c43f..010257b526b 100644 --- a/test/mpi/matrix.cpp +++ b/test/mpi/matrix.cpp @@ -204,14 +204,29 @@ TYPED_TEST(MatrixCreation, BuildFromExistingData) using value_type = typename TestFixture::value_type; using csr = typename TestFixture::local_matrix_type; using global_index_type = typename TestFixture::global_index_type; - using Partition = typename TestFixture::Partition; using local_index_type = typename TestFixture::local_index_type; + using Partition = typename TestFixture::Partition; + using index_map_type = + gko::experimental::distributed::index_map; using matrix_data = gko::matrix_data; using dist_mtx_type = typename TestFixture::dist_mtx_type; using dist_vec_type = gko::experimental::distributed::Vector; using comm_index_type = gko::experimental::distributed::comm_index_type; auto rank = this->comm.rank(); - I> res_local[] = {{{2, 0}, {0, 0}}, {{0, 5}, {0, 0}}, {{0}}}; + auto row_part = gko::share(Partition::build_from_contiguous( + this->exec, gko::array( + this->exec, I{0, 2, 4, 5}))); + auto col_part = gko::share(Partition::build_from_mapping( + this->exec, + gko::array(this->exec, + I{1, 1, 2, 0, 0}), + 3)); + std::array, 3> recv_connections = { + gko::array{this->exec, {1, 2}}, + gko::array{this->exec, {3, 4, 2}}, + gko::array{this->exec, {4, 0}}}; + index_map_type imap(this->exec, col_part, rank, recv_connections[rank]); std::array, 3> size_local{{{2, 2}, {2, 2}, {1, 1}}}; std::array dist_input_local{ {{size_local[0], {{0, 0, 2}}}, @@ -219,19 +234,14 @@ TYPED_TEST(MatrixCreation, BuildFromExistingData) {size_local[2], std::initializer_list< gko::detail::input_triple>{}}}}; - I> res_non_local[] = { - {{1, 0}, {3, 4}}, {{0, 0, 6}, {8, 7, 0}}, {{10, 9}}}; std::array, 3> size_non_local{{{2, 2}, {2, 3}, {1, 2}}}; std::array dist_input_non_local{ {{size_non_local[0], {{0, 0, 1}, {1, 0, 3}, {1, 1, 4}}}, {size_non_local[1], {{0, 2, 6}, {1, 0, 8}, {1, 1, 7}}}, {size_non_local[2], {{0, 0, 10}, {0, 1, 9}}}}}; - std::array, 3> recv_sizes{ - {{0, 1, 1}, {2, 0, 1}, {1, 1, 0}}}; - std::array, 3> recv_offsets{ - {{0, 0, 1, 2}, {0, 2, 2, 3}, {0, 1, 2, 2}}}; - std::array, 3> recv_gather_index{ - {{this->exec, {1, 0}}, {this->exec, {0, 1, 0}}, {this->exec, {1, 0}}}}; + I> res_local[] = {{{2, 0}, {0, 0}}, {{0, 5}, {0, 0}}, {{0}}}; + I> res_non_local[] = { + {{1, 0}, {3, 4}}, {{0, 0, 6}, {8, 7, 0}}, {{10, 9}}}; auto local = gko::share(csr::create(this->exec)); local->read(dist_input_local[rank]); auto non_local = gko::share(csr::create(this->exec)); @@ -240,22 +250,13 @@ TYPED_TEST(MatrixCreation, BuildFromExistingData) auto vec_md = gko::matrix_data{ I>{{1}, {2}, {3}, {4}, {5}}}; I> result[3] = {{{10}, {18}}, {{28}, {67}}, {{59}}}; - auto row_part = Partition::build_from_contiguous( - this->exec, gko::array( - this->exec, I{0, 2, 4, 5})); - auto col_part = Partition::build_from_mapping( - this->exec, - gko::array(this->exec, - I{1, 1, 2, 0, 0}), - 3); auto x = dist_vec_type::create(this->ref, this->comm); auto y = dist_vec_type::create(this->ref, this->comm); x->read_distributed(vec_md, col_part); y->read_distributed(vec_md, row_part); - auto mat = dist_mtx_type::create( - this->exec, this->comm, gko::dim<2>{5, 5}, local, non_local, - recv_sizes[rank], recv_offsets[rank], recv_gather_index[rank]); + auto mat = + dist_mtx_type::create(this->exec, this->comm, imap, local, non_local); mat->apply(x, y); GKO_ASSERT_MTX_NEAR(gko::as(mat->get_local_matrix()), res_local[rank],