Skip to content

Commit

Permalink
Initial OpenACC parallel_reduce implementation for Team policy (#5610)
Browse files Browse the repository at this point in the history
* Initial OpenACC parallel_reduce implementation for Team policy

* Clang-format

* Apply suggestions from code review

Apply suggestions from code review (@masterleinad and @dalg24)

Co-authored-by: Daniel Arndt <arndtd@ornl.gov>
Co-authored-by: Damien L-G <dalg24+github@gmail.com>

* Minor modification as suggested by code review.

* Change `always_false` to `always_true`.

* Minor fix on parallel_reduce() implementation.

* Apply suggestions from code review

Co-authored-by: Damien L-G <dalg24+github@gmail.com>

* Revert `std::enable_if_t<Kokkos::is_view_v<ViewType>::value>* = nullptr)` back to `std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr)`
(No definition of is_view_v<> found).

* Change `const FunctorType a_functor(m_functor);` to `auto const a_functor = m_functor;`

* Rebase this branch and change is_view<> to is_view_v<>

* Updated the copyright and rebased this branch.

* Add a comment to the parallel-reduce() implementation

* Comment out the hierarchical reduction implementations not used for now.

* Re-factored the parallel_reduce construct with Team policy to support
different reduction types.

* Add a missing acc routine directive.

* Re-factor parallel_reduce(team policy) constructs.

* Set KOKKOS_OPENACC_FEATURE_LEVEL to 14 in core/unit_test/CMakeLists.txt.

* Fix minor bugs in the OpenACC parallel_reduce(Team Policy)
Versions with KOKKOS_ENABLE_OPENACC_COLLAPSE_HIERARCHICAL_CONSTRUCTS
macro enabled and disabled should have different league loop
implementations.

* Change "auto const a_functor = m_functor;" back to "auto const a_functor(m_functor);"

---------

Co-authored-by: Daniel Arndt <arndtd@ornl.gov>
Co-authored-by: Damien L-G <dalg24+github@gmail.com>
  • Loading branch information
3 people committed Jan 31, 2023
1 parent 59067d4 commit 0130a3f
Show file tree
Hide file tree
Showing 7 changed files with 453 additions and 15 deletions.
1 change: 0 additions & 1 deletion core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#ifndef KOKKOS_OPENACC_PARALLEL_FOR_TEAM_HPP
#define KOKKOS_OPENACC_PARALLEL_FOR_TEAM_HPP

#include <openacc.h>
#include <OpenACC/Kokkos_OpenACC_Team.hpp>
#include <OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp>

Expand Down
428 changes: 428 additions & 0 deletions core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Team.hpp

Large diffs are not rendered by default.

28 changes: 15 additions & 13 deletions core/src/OpenACC/Kokkos_OpenACC_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class OpenACCTeamMember {
// FIXME_OPENACC: team_broadcast() is not implemented.
template <class ValueType>
KOKKOS_FUNCTION void team_broadcast(ValueType& value, int thread_id) const {
static_assert(Kokkos::Impl::always_false<ValueType>::value,
static_assert(!Kokkos::Impl::always_true<ValueType>::value,
"Kokkos Error: team_broadcast() is not implemented for the "
"OpenACC backend");
return ValueType();
Expand All @@ -99,7 +99,7 @@ class OpenACCTeamMember {
template <class ValueType, class JoinOp>
KOKKOS_FUNCTION ValueType team_reduce(const ValueType& value,
const JoinOp& op_in) const {
static_assert(Kokkos::Impl::always_false<ValueType>::value,
static_assert(!Kokkos::Impl::always_true<ValueType>::value,
"Kokkos Error: team_reduce() is not implemented for the "
"OpenACC backend");
return ValueType();
Expand All @@ -110,7 +110,7 @@ class OpenACCTeamMember {
KOKKOS_FUNCTION ArgType team_scan(const ArgType& /*value*/,
ArgType* const /*global_accum*/) const {
static_assert(
Kokkos::Impl::always_false<ArgType>::value,
!Kokkos::Impl::always_true<ArgType>::value,
"Kokkos Error: team_scan() is not implemented for the OpenACC backend");
return ArgType();
}
Expand Down Expand Up @@ -163,37 +163,37 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenACC, Properties...>
// implementations.
template <class FunctorType>
static int team_size_max(const FunctorType&, const ParallelForTag&) {
return DEFAULT_TEAM_SIZE_MAX;
return default_team_size_max;
}

template <class FunctorType>
static int team_size_max(const FunctorType&, const ParallelReduceTag&) {
return DEFAULT_TEAM_SIZE_MAX;
return default_team_size_max;
}

template <class FunctorType, class ReducerType>
static int team_size_max(const FunctorType&, const ReducerType&,
const ParallelReduceTag&) {
return DEFAULT_TEAM_SIZE_MAX;
return default_team_size_max;
}

// FIXME_OPENACC: update team_size_recommended() APIs with realistic
// implementations.
template <class FunctorType>
static int team_size_recommended(const FunctorType&, const ParallelForTag&) {
return DEFAULT_TEAM_SIZE_REC;
return default_team_size;
}

template <class FunctorType>
static int team_size_recommended(const FunctorType&,
const ParallelReduceTag&) {
return DEFAULT_TEAM_SIZE_REC;
return default_team_size;
}

template <class FunctorType, class ReducerType>
static int team_size_recommended(const FunctorType&, const ReducerType&,
const ParallelReduceTag&) {
return DEFAULT_TEAM_SIZE_REC;
return default_team_size;
}

//----------------------------------------
Expand All @@ -208,7 +208,9 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenACC, Properties...>
std::array<size_t, 2> m_thread_scratch_size;
bool m_tune_team_size;
bool m_tune_vector_length;
constexpr static const size_t default_team_size =
constexpr static int default_team_size_max =
OpenACCTeamMember::DEFAULT_TEAM_SIZE_MAX;
constexpr static int default_team_size =
OpenACCTeamMember::DEFAULT_TEAM_SIZE_REC;
int m_chunk_size;

Expand All @@ -226,8 +228,8 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenACC, Properties...>
public:
bool impl_auto_team_size() const { return m_tune_team_size; }
bool impl_auto_vector_length() const { return m_tune_vector_length; }
void impl_set_team_size(const size_t size) { m_team_size = size; }
void impl_set_vector_length(const size_t length) {
void impl_set_team_size(const int size) { m_team_size = size; }
void impl_set_vector_length(const int length) {
m_tune_vector_length = length;
}
int impl_vector_length() const { return m_vector_length; }
Expand Down Expand Up @@ -348,7 +350,7 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenACC, Properties...>
m_chunk_size(0) {
init(league_size_request, team_size_request, 1);
}
static size_t vector_length_max() {
static int vector_length_max() {
return 32; /* TODO: this is bad. Need logic that is compiler and backend
aware */
}
Expand Down
1 change: 1 addition & 0 deletions core/src/decl/Kokkos_Declare_OPENACC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <OpenACC/Kokkos_OpenACC_ParallelFor_MDRange.hpp>
#include <OpenACC/Kokkos_OpenACC_ParallelReduce_MDRange.hpp>
#include <OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp>
#include <OpenACC/Kokkos_OpenACC_ParallelReduce_Team.hpp>
#endif

#endif
2 changes: 1 addition & 1 deletion core/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ SET(KOKKOS_SYCL_FEATURE_LEVEL 999)
SET(KOKKOS_SYCL_NAME Experimental::SYCL)
SET(KOKKOS_THREADS_FEATURE_LEVEL 999)
SET(KOKKOS_THREADS_NAME Threads)
SET(KOKKOS_OPENACC_FEATURE_LEVEL 11)
SET(KOKKOS_OPENACC_FEATURE_LEVEL 14)
SET(KOKKOS_OPENACC_NAME Experimental::OpenACC)


Expand Down
4 changes: 4 additions & 0 deletions core/unit_test/incremental/Test12a_ThreadScratch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ struct ThreadScratch {

TEST(TEST_CATEGORY, IncrTest_12a_ThreadScratch) {
ThreadScratch<TEST_EXECSPACE> test;
#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC
GTEST_SKIP() << "skipping since scratch memory is not yet implemented in the "
"OpenACC backend";
#endif
// FIXME_OPENMPTARGET - team_size has to be a multiple of 32 for the tests to
// pass in the Release and RelWithDebInfo builds. Does not need the team_size
// to be a multiple of 32 for the Debug builds.
Expand Down
4 changes: 4 additions & 0 deletions core/unit_test/incremental/Test12b_TeamScratch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ struct TeamScratch {

TEST(TEST_CATEGORY, IncrTest_12b_TeamScratch) {
TeamScratch<TEST_EXECSPACE> test;
#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC
GTEST_SKIP() << "skipping since scratch memory is not yet implemented in the "
"OpenACC backend";
#endif
// FIXME_OPENMPTARGET - team_size has to be a multiple of 32 for the tests to
// pass in the Release and RelWithDebInfo builds. Does not need the team_size
// to be a multiple of 32 for the Debug builds.
Expand Down

0 comments on commit 0130a3f

Please sign in to comment.