Skip to content

Commit

Permalink
review updates
Browse files Browse the repository at this point in the history
- merge /common/unified/.../partitions_kernels.hpp.inc into /commun/cuda_hip/.../partitions_kernsls.hpp.inc
- documentation

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
  • Loading branch information
MarcelKoch and yhmtsai committed Nov 29, 2021
1 parent 810d86c commit 5bb5436
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 319 deletions.
61 changes: 60 additions & 1 deletion common/cuda_hip/distributed/partition_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,66 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/


#include "common/unified/distributed/partition_kernels.hpp.inc"
namespace kernel {


template <typename LocalIndexType, typename GlobalIndexType>
void setup_sizes_ids_permutation(
std::shared_ptr<const DefaultExecutor> exec, size_type num_ranges,
comm_index_type num_parts, const GlobalIndexType* range_offsets,
const comm_index_type* range_parts, Array<LocalIndexType>& range_sizes,
Array<comm_index_type>& part_ids, Array<GlobalIndexType>& permutation)
{
run_kernel(
exec,
[] GKO_KERNEL(auto i, auto num_ranges, auto num_parts,
auto range_offsets, auto range_parts, auto range_sizes,
auto part_ids, auto permutation) {
if (i == 0) {
// set sentinel value at the end
part_ids[num_ranges] = num_parts;
}
range_sizes[i] = range_offsets[i + 1] - range_offsets[i];
part_ids[i] = range_parts[i];
permutation[i] = static_cast<GlobalIndexType>(i);
},
num_ranges, num_ranges, num_parts, range_offsets, range_parts,
range_sizes.get_data(), part_ids.get_data(), permutation.get_data());
}


template <typename LocalIndexType, typename GlobalIndexType>
void compute_part_sizes_and_starting_indices(
std::shared_ptr<const DefaultExecutor> exec, size_type num_ranges,
const Array<LocalIndexType>& range_sizes,
const Array<comm_index_type>& part_ids,
const Array<GlobalIndexType>& permutation, LocalIndexType* starting_indices,
LocalIndexType* part_sizes)
{
run_kernel(
exec,
[] GKO_KERNEL(auto i, auto grouped_starting_indices,
auto grouped_part_ids, auto orig_idxs,
auto starting_indices, auto part_sizes) {
auto prev_part = i > 0 ? grouped_part_ids[i - 1]
: invalid_index<comm_index_type>();
auto cur_part = grouped_part_ids[i];
auto next_part =
grouped_part_ids[i + 1]; // last element has to be num_parts
if (cur_part != next_part) {
part_sizes[cur_part] = grouped_starting_indices[i];
}
// write result shifted by one entry to get exclusive prefix sum
starting_indices[orig_idxs[i]] =
prev_part == cur_part ? grouped_starting_indices[i - 1]
: LocalIndexType{};
},
num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(),
permutation.get_const_data(), starting_indices, part_sizes);
}


} // namespace kernel


template <typename LocalIndexType, typename GlobalIndexType>
Expand Down
92 changes: 0 additions & 92 deletions common/unified/distributed/partition_kernels.hpp.inc

This file was deleted.

2 changes: 2 additions & 0 deletions dpcpp/distributed/partition_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ namespace dpcpp {
namespace partition {


// TODO: wait until https://github.com/oneapi-src/oneDPL/pull/388 is release to
// implement it similar to cuda/hip
template <typename LocalIndexType, typename GlobalIndexType>
void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
const GlobalIndexType* range_offsets,
Expand Down
7 changes: 4 additions & 3 deletions include/ginkgo/core/distributed/partition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,10 @@ class Partition
*
* @param exec the Executor on which the partition should be built
* @param ranges the boundaries of the ranges representing each part.
Part i contains the indices [ranges[i], ranges[i + 1]).
Has to contain at least one element.
* Part i contains the indices [ranges[i], ranges[i + 1]).
* Has to contain at least one element.
* The first element has to be 0.
*
* @return a Partition representing the given contiguous partitioning.
*/
static std::unique_ptr<Partition> build_from_contiguous(
Expand Down
Loading

0 comments on commit 5bb5436

Please sign in to comment.