Skip to content

Commit

Permalink
Merge pull request #5820 from crtrott/fix-intel-ice-dev
Browse files Browse the repository at this point in the history
Changes to resolve ICE with intel classic compiler
  • Loading branch information
dalg24 committed Jan 28, 2023
2 parents 60f80e5 + 2b5c31a commit 2e1a559
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 182 deletions.
46 changes: 20 additions & 26 deletions core/src/Kokkos_HPX.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1065,17 +1065,16 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType,
WorkTag, void>;

const FunctorType m_functor;
const MDRangePolicy m_mdr_policy;
const iterate_type m_iter;
const Policy m_policy;

public:
void execute() const { dispatch_execute_task(this, m_mdr_policy.space()); }
void execute() const { dispatch_execute_task(this, m_iter.m_rp.space()); }

inline void execute_task() const {
// See [note 1] for an explanation.
Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
m_mdr_policy.space());
m_iter.m_rp.space());

auto exec = Kokkos::Experimental::HPX::impl_get_executor();

Expand All @@ -1087,9 +1086,8 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,

for_loop(par.on(exec).with(
static_chunk_size(get_hpx_adjusted_chunk_size(m_policy))),
m_policy.begin(), m_policy.end(), [this](const Member i) {
iterate_type(m_mdr_policy, m_functor)(i);
});
m_policy.begin(), m_policy.end(),
[this](const Member i) { iterate_type(i); });

#elif KOKKOS_HPX_IMPLEMENTATION == 1
using hpx::for_loop_strided;
Expand All @@ -1101,15 +1099,14 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
const Member i_end =
(std::min)(i_begin + chunk_size, m_policy.end());
for (Member i = i_begin; i < i_end; ++i) {
iterate_type(m_mdr_policy, m_functor)(i);
m_iter(i);
}
});
#endif
}

inline ParallelFor(const FunctorType &arg_functor, MDRangePolicy arg_policy)
: m_functor(arg_functor),
m_mdr_policy(arg_policy),
: m_iter(arg_policy, arg_functor),
m_policy(Policy(0, arg_policy.m_num_tiles).set_chunk_size(1)) {}
template <typename Policy, typename Functor>
static int max_tile_size_product(const Policy &, const Functor &) {
Expand Down Expand Up @@ -1406,8 +1403,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType,
WorkTag, reference_type>;

const FunctorType m_functor;
const MDRangePolicy m_mdr_policy;
const iterate_type m_iter;
const Policy m_policy;
const ReducerType m_reducer;
const pointer_type m_result_ptr;
Expand All @@ -1416,19 +1412,19 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,

public:
void execute() const {
dispatch_execute_task(this, m_mdr_policy.space(), m_force_synchronous);
dispatch_execute_task(this, m_iter.m_rp.space(), m_force_synchronous);
}

inline void execute_task() const {
// See [note 1] for an explanation.
Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
m_mdr_policy.space());
m_iter.m_rp.space());

const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
const std::size_t value_size =
Analysis::value_size(ReducerConditional::select(m_functor, m_reducer));
const std::size_t value_size = Analysis::value_size(
ReducerConditional::select(m_iter.m_func, m_reducer));

thread_buffer &buffer = m_mdr_policy.space().impl_get_buffer();
thread_buffer &buffer = m_iter.m_rp.space().impl_get_buffer();
buffer.resize(num_worker_threads, value_size);

using hpx::for_loop;
Expand All @@ -1438,7 +1434,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
auto exec = Kokkos::Experimental::HPX::impl_get_executor();

typename Analysis::Reducer final_reducer(
&ReducerConditional::select(m_functor, m_reducer));
&ReducerConditional::select(m_iter.m_func, m_reducer));

#if KOKKOS_HPX_IMPLEMENTATION == 0

Expand All @@ -1454,7 +1450,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
reference_type update = Analysis::Reducer::reference(
reinterpret_cast<pointer_type>(buffer.get(
Kokkos::Experimental::HPX::impl_hardware_thread_id())));
iterate_type(m_mdr_policy, m_functor, update)(i);
m_iter(i, update);
});

#elif KOKKOS_HPX_IMPLEMENTATION == 1
Expand All @@ -1477,7 +1473,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
const Member i_end = (std::min)(i_begin + chunk_size, m_policy.end());

for (Member i = i_begin; i < i_end; ++i) {
iterate_type(m_mdr_policy, m_functor, update)(i);
m_iter(i, update);
}
});
#endif
Expand All @@ -1491,7 +1487,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,

if (m_result_ptr != nullptr) {
const int n = Analysis::value_count(
ReducerConditional::select(m_functor, m_reducer));
ReducerConditional::select(m_iter.m_func, m_reducer));

for (int j = 0; j < n; ++j) {
m_result_ptr[j] = reinterpret_cast<pointer_type>(buffer.get(0))[j];
Expand All @@ -1506,18 +1502,16 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
std::enable_if_t<Kokkos::is_view<ViewType>::value &&
!Kokkos::is_reducer<ReducerType>::value,
void *> = nullptr)
: m_functor(arg_functor),
m_mdr_policy(arg_policy),
: m_iter(arg_policy, arg_functor),
m_policy(Policy(0, arg_policy.m_num_tiles).set_chunk_size(1)),
m_reducer(InvalidType()),
m_result_ptr(arg_view.data()),
m_force_synchronous(!arg_view.impl_track().has_record()) {}

inline ParallelReduce(const FunctorType &arg_functor,
MDRangePolicy arg_policy, const ReducerType &reducer)
: m_functor(arg_functor),
m_mdr_policy(arg_policy),
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)),
: m_iter(arg_policy, arg_functor),
m_policy(Policy(0, arg_policy.m_num_tiles).set_chunk_size(1)),
m_reducer(reducer),
m_result_ptr(reducer.view().data()),
m_force_synchronous(!reducer.view().impl_track().has_record()) {}
Expand Down
Loading

0 comments on commit 2e1a559

Please sign in to comment.