Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dpcpp csr check_diagonal_entries and add_scaled_identity #1436

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion common/cuda_hip/matrix/csr_common.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ __global__ __launch_bounds__(default_block_size) void check_diagonal_entries(
if (tile_grp.thread_rank() == 0) {
*has_all_diags = false;
}
return;
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -826,15 +826,19 @@ __global__ __launch_bounds__(default_block_size) void add_scaled_identity(
auto tile_grp =
group::tiled_partition<warp_size>(group::this_thread_block());
const auto warpid = thread::get_subwarp_id_flat<warp_size, IndexType>();
const auto num_warps = thread::get_subwarp_num_flat<warp_size, IndexType>();
if (warpid < num_rows) {
const auto tid_in_warp = tile_grp.thread_rank();
const IndexType row_start = row_ptrs[warpid];
const IndexType num_nz = row_ptrs[warpid + 1] - row_start;
const auto beta_val = beta[0];
const auto alpha_val = alpha[0];
for (IndexType iz = tid_in_warp; iz < num_nz; iz += warp_size) {
values[iz + row_start] *= beta[0];
if (col_idxs[iz + row_start] == warpid) {
values[iz + row_start] += alpha[0];
if (beta_val != one<ValueType>()) {
values[iz + row_start] *= beta_val;
}
if (col_idxs[iz + row_start] == warpid &&
alpha_val != zero<ValueType>()) {
values[iz + row_start] += alpha_val;
}
}
}
Expand Down
104 changes: 101 additions & 3 deletions dpcpp/matrix/csr_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,76 @@ void extract_diagonal(size_type diag_size, size_type nnz,
GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal);


template <typename IndexType>
void check_diagonal_entries(const IndexType num_min_rows_cols,
const IndexType* const __restrict__ row_ptrs,
const IndexType* const __restrict__ col_idxs,
bool* const __restrict__ has_all_diags,
sycl::nd_item<3> item_ct1)
{
constexpr int subgroup_size = config::warp_size;
auto tile_grp = group::tiled_partition<subgroup_size>(
group::this_thread_block(item_ct1));
const auto row =
thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
if (row < num_min_rows_cols) {
const auto tid_in_warp = tile_grp.thread_rank();
const auto row_start = row_ptrs[row];
const auto num_nz = row_ptrs[row + 1] - row_start;
bool row_has_diag_local{false};
for (IndexType iz = tid_in_warp; iz < num_nz; iz += subgroup_size) {
if (col_idxs[iz + row_start] == row) {
row_has_diag_local = true;
break;
}
}
auto row_has_diag = static_cast<bool>(tile_grp.any(row_has_diag_local));
if (!row_has_diag) {
if (tile_grp.thread_rank() == 0) {
*has_all_diags = false;
}
}
}
}

GKO_ENABLE_DEFAULT_HOST(check_diagonal_entries, check_diagonal_entries);


template <typename ValueType, typename IndexType>
void add_scaled_identity(const ValueType* const __restrict__ alpha,
const ValueType* const __restrict__ beta,
const IndexType num_rows,
const IndexType* const __restrict__ row_ptrs,
const IndexType* const __restrict__ col_idxs,
ValueType* const __restrict__ values,
sycl::nd_item<3> item_ct1)
{
constexpr int subgroup_size = config::warp_size;
auto tile_grp = group::tiled_partition<subgroup_size>(
group::this_thread_block(item_ct1));
const auto row =
thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
if (row < num_rows) {
const auto tid_in_warp = tile_grp.thread_rank();
const auto row_start = row_ptrs[row];
const auto num_nz = row_ptrs[row + 1] - row_start;
const auto beta_val = beta[0];
const auto alpha_val = alpha[0];
for (IndexType iz = tid_in_warp; iz < num_nz; iz += subgroup_size) {
if (beta_val != one<ValueType>()) {
values[iz + row_start] *= beta_val;
}
if (col_idxs[iz + row_start] == row &&
alpha_val != zero<ValueType>()) {
values[iz + row_start] += alpha_val;
}
}
}
}

GKO_ENABLE_DEFAULT_HOST(add_scaled_identity, add_scaled_identity);


} // namespace kernel


Expand Down Expand Up @@ -2364,8 +2434,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
template <typename ValueType, typename IndexType>
void check_diagonal_entries_exist(
std::shared_ptr<const DpcppExecutor> exec,
const matrix::Csr<ValueType, IndexType>* const mtx,
bool& has_all_diags) GKO_NOT_IMPLEMENTED;
const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
{
const size_type num_subgroup = mtx->get_size()[0];
if (num_subgroup > 0) {
const size_type num_blocks =
num_subgroup / (default_block_size / config::warp_size);
array<bool> has_diags(exec, {true});
kernel::check_diagonal_entries(
num_blocks, default_block_size, 0, exec->get_queue(),
static_cast<IndexType>(
std::min(mtx->get_size()[0], mtx->get_size()[1])),
mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
has_diags.get_data());
has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
} else {
has_all_diags = true;
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST);
Expand All @@ -2376,7 +2462,19 @@ void add_scaled_identity(std::shared_ptr<const DpcppExecutor> exec,
const matrix::Dense<ValueType>* const alpha,
const matrix::Dense<ValueType>* const beta,
matrix::Csr<ValueType, IndexType>* const mtx)
GKO_NOT_IMPLEMENTED;
{
const auto nrows = mtx->get_size()[0];
if (nrows == 0) {
return;
}
const auto nthreads = nrows * config::warp_size;
const auto nblocks = ceildiv(nthreads, default_block_size);
kernel::add_scaled_identity(
nblocks, default_block_size, 0, exec->get_queue(),
alpha->get_const_values(), beta->get_const_values(),
static_cast<IndexType>(nrows), mtx->get_const_row_ptrs(),
mtx->get_const_col_idxs(), mtx->get_values());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
Expand Down
11 changes: 8 additions & 3 deletions omp/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1134,12 +1134,17 @@ void add_scaled_identity(std::shared_ptr<const OmpExecutor> exec,
const auto nrows = static_cast<IndexType>(mtx->get_size()[0]);
const auto row_ptrs = mtx->get_const_row_ptrs();
const auto vals = mtx->get_values();
const auto beta_val = beta->get_const_values()[0];
const auto alpha_val = alpha->get_const_values()[0];
#pragma omp parallel for
for (IndexType row = 0; row < nrows; row++) {
for (IndexType iz = row_ptrs[row]; iz < row_ptrs[row + 1]; iz++) {
vals[iz] *= beta->get_const_values()[0];
if (row == mtx->get_const_col_idxs()[iz]) {
vals[iz] += alpha->get_const_values()[0];
if (beta_val != one<ValueType>()) {
vals[iz] *= beta_val;
}
if (row == mtx->get_const_col_idxs()[iz] &&
alpha_val != zero<ValueType>()) {
vals[iz] += alpha_val;
}
}
}
Expand Down
6 changes: 0 additions & 6 deletions test/matrix/csr_kernels2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1311,9 +1311,6 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef)
}


#ifndef GKO_COMPILING_DPCPP


TEST_F(Csr, CanDetectMissingDiagonalEntry)
{
using T = double;
Expand Down Expand Up @@ -1359,6 +1356,3 @@ TEST_F(Csr, AddScaledIdentityToNonSquare)

GKO_ASSERT_MTX_NEAR(mtx, dmtx, r<value_type>::value);
}


#endif // GKO_COMPILING_DPCPP
Loading