Skip to content

Commit

Permalink
review updates
Browse files Browse the repository at this point in the history
* remove unused variables
* add documentation to highest_precision helper

Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
  • Loading branch information
upsj and tcojean committed Oct 1, 2021
1 parent 9bd1f32 commit c3f5fa3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
11 changes: 11 additions & 0 deletions include/ginkgo/core/base/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,17 @@ template <typename T>
using increase_precision = typename detail::increase_precision_impl<T>::type;


/**
* Obtains the smallest arithmetic type that is able to store elements of all
* template parameter types exactly. All template type parameters need to be
* either real or complex types, mixing them is not possible.
*
* Formally, it computes a right-fold over the type list, with the highest
* precision of a pair of real arithmetic types T1, T2 computed as
* `decltype(T1{} + T2{})`, or
* `std::complex<highest_precision<remove_complex<T1>, remove_complex<T2>>>` for
* complex types.
*/
template <typename... Ts>
using highest_precision =
typename detail::highest_precision_variadic<Ts...>::type;
Expand Down
12 changes: 3 additions & 9 deletions omp/matrix/coo_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ void spmv2_blocked(std::shared_ptr<const OmpExecutor> exec,
for (auto local_nz = nz;
local_nz < end && coo_row[local_nz] == first;
local_nz++) {
const auto row = first;
const auto col = coo_col[local_nz];
#pragma unroll
for (size_type i = 0; i < block_size; i++) {
Expand Down Expand Up @@ -190,7 +189,6 @@ void spmv2_blocked(std::shared_ptr<const OmpExecutor> exec,
// sums
partial_sum.fill(zero<ValueType>());
for (auto local_nz = nz; local_nz < end; local_nz++) {
const auto row = last;
const auto col = coo_col[local_nz];
#pragma unroll
for (size_type i = 0; i < block_size; i++) {
Expand All @@ -205,13 +203,12 @@ void spmv2_blocked(std::shared_ptr<const OmpExecutor> exec,
for (size_type i = 0; i < block_size; i++) {
const auto rhs = i + rhs_base;
const auto row = last;
atomic_add(c->at(last, rhs), partial_sum[i]);
atomic_add(c->at(row, rhs), partial_sum[i]);
}
}
// handle row overlap with following thread: block partial sums
partial_sum.fill(zero<ValueType>());
for (; nz < end; nz++) {
const auto row = last;
const auto col = coo_col[nz];
for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) {
partial_sum[rhs - rounded_rhs] +=
Expand All @@ -221,8 +218,7 @@ void spmv2_blocked(std::shared_ptr<const OmpExecutor> exec,
// handle row overlap with following thread: block add to memory
for (size_type rhs = rounded_rhs; rhs < num_rhs; rhs++) {
const auto row = last;
atomic_add(c->at(last, rhs),
partial_sum[rhs - rounded_rhs]);
atomic_add(c->at(row, rhs), partial_sum[rhs - rounded_rhs]);
}
}
}
Expand Down Expand Up @@ -260,7 +256,6 @@ void spmv2_small_rhs(std::shared_ptr<const OmpExecutor> exec,
// handle row overlap with previous thread: partial sums
partial_sum.fill(zero<ValueType>());
for (; nz < end && coo_row[nz] == first; nz++) {
const auto row = first;
const auto col = coo_col[nz];
#pragma unroll
for (size_type rhs = 0; rhs < num_rhs; rhs++) {
Expand All @@ -287,7 +282,6 @@ void spmv2_small_rhs(std::shared_ptr<const OmpExecutor> exec,
// handle row overlap with following thread: partial sums
partial_sum.fill(zero<ValueType>());
for (; nz < end; nz++) {
const auto row = last;
const auto col = coo_col[nz];
#pragma unroll
for (size_type rhs = 0; rhs < num_rhs; rhs++) {
Expand All @@ -299,7 +293,7 @@ void spmv2_small_rhs(std::shared_ptr<const OmpExecutor> exec,
#pragma unroll
for (size_type rhs = 0; rhs < num_rhs; rhs++) {
const auto row = last;
atomic_add(c->at(last, rhs), partial_sum[rhs]);
atomic_add(c->at(row, rhs), partial_sum[rhs]);
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions omp/matrix/sellp_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ void spmv_small_rhs(std::shared_ptr<const OmpExecutor> exec,
matrix::Dense<ValueType>* c, OutFn out)
{
GKO_ASSERT(b->get_size()[1] == num_rhs);
auto col_idxs = a->get_const_col_idxs();
auto slice_lengths = a->get_const_slice_lengths();
auto slice_sets = a->get_const_slice_sets();
auto slice_size = a->get_slice_size();
Expand Down Expand Up @@ -100,7 +99,6 @@ void spmv_blocked(std::shared_ptr<const OmpExecutor> exec,
const matrix::Dense<ValueType>* b,
matrix::Dense<ValueType>* c, OutFn out)
{
auto col_idxs = a->get_const_col_idxs();
auto slice_lengths = a->get_const_slice_lengths();
auto slice_sets = a->get_const_slice_sets();
auto slice_size = a->get_slice_size();
Expand Down

0 comments on commit c3f5fa3

Please sign in to comment.