Skip to content

Commit

Permalink
Merge pull request #703 from ginkgo-project/omp_prefix_sum
Browse files Browse the repository at this point in the history
Simple parallel algorithm for exclusive prefix sum for the OpenMP backend

Merge PR #703
  • Loading branch information
Slaedr committed Feb 24, 2021
2 parents 14da01e + 89914a7 commit 72d8980
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 7 deletions.
16 changes: 16 additions & 0 deletions core/components/prefix_sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,22 @@ namespace gko {
namespace kernels {


/**
* \fn prefix_sum
* Computes an exclusive prefix sum or exclusive scan of the input array.
*
* As with the standard definition of exclusive scan, the last entry of the
* input array is not read at all, but is written to.
* If the input is [3,4,1,9,100], it will be replaced by
* [0,3,7,8,17].
*
* \tparam IndexType Type of entries to be scanned (summed).
*
* \param exec Executor on which to run the scan operation
* \param counts The input/output array to be scanned with the sum operation
* \param num_entries Size of the array, equal to one more than the number
* of entries to be summed.
*/
#define GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType) \
void prefix_sum(std::shared_ptr<const DefaultExecutor> exec, \
IndexType *counts, size_type num_entries)
Expand Down
65 changes: 58 additions & 7 deletions omp/components/prefix_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,72 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/components/prefix_sum.hpp"


#include <algorithm>


#include <omp.h>


#include "core/base/allocator.hpp"


namespace gko {
namespace kernels {
namespace omp {
namespace components {


/*
* The last entry of the input array is never used, but is replaced.
*/
template <typename IndexType>
void prefix_sum(std::shared_ptr<const OmpExecutor> exec, IndexType *counts,
size_type num_entries)
void prefix_sum(std::shared_ptr<const OmpExecutor> exec,
IndexType *const counts, const size_type num_entries)
{
IndexType partial_sum{};
for (size_type i = 0; i < num_entries; ++i) {
auto nnz = counts[i];
counts[i] = partial_sum;
partial_sum += nnz;
// the operation only makes sense for arrays of size at least 2
if (num_entries < 2) {
if (num_entries == 0) {
return;
} else {
counts[0] = 0;
return;
}
}

const int nthreads = omp_get_max_threads();
vector<IndexType> proc_sums(nthreads, 0, {exec});
const size_type def_num_witems = (num_entries - 1) / nthreads + 1;

#pragma omp parallel
{
const int thread_id = omp_get_thread_num();
const size_type startidx = thread_id * def_num_witems;
const size_type endidx =
std::min(num_entries, (thread_id + 1) * def_num_witems);

IndexType partial_sum{0};
for (size_type i = startidx; i < endidx; ++i) {
auto nnz = counts[i];
counts[i] = partial_sum;
partial_sum += nnz;
}

proc_sums[thread_id] = partial_sum;

#pragma omp barrier

#pragma omp single
{
for (int i = 0; i < nthreads - 1; i++) {
proc_sums[i + 1] += proc_sums[i];
}
}

if (thread_id > 0) {
for (size_type i = startidx; i < endidx; i++) {
counts[i] += proc_sums[thread_id - 1];
}
}
}
}

Expand Down
7 changes: 7 additions & 0 deletions omp/test/components/prefix_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ class PrefixSum : public ::testing::Test {
TYPED_TEST_SUITE(PrefixSum, gko::test::IndexTypes);


TYPED_TEST(PrefixSum, TrivialCasesEqualReference)
{
this->test(0);
this->test(1);
}


TYPED_TEST(PrefixSum, SmallEqualsReference) { this->test(100); }


Expand Down

0 comments on commit 72d8980

Please sign in to comment.