Skip to content

Commit

Permalink
rm redundent guard, keep highest prec on ref/omp
Browse files Browse the repository at this point in the history
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
  • Loading branch information
yhmtsai and upsj committed Feb 15, 2022
1 parent 740fbe9 commit ff428ce
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 87 deletions.
27 changes: 9 additions & 18 deletions cuda/matrix/sparsity_csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,25 +118,16 @@ void classical_spmv(syn::value_list<int, subwarp_size>,
return;
}
if (alpha == nullptr && beta == nullptr) {
if (grid.x > 0 && grid.y > 0) {
kernel::abstract_classical_spmv<subwarp_size>
<<<grid, block, 0, 0>>>(
a->get_size()[0], as_cuda_type(a->get_const_value()),
a->get_const_col_idxs(),
as_cuda_type(a->get_const_row_ptrs()),
acc::as_cuda_range(b_vals), acc::as_cuda_range(c_vals));
}
kernel::abstract_classical_spmv<subwarp_size><<<grid, block, 0, 0>>>(
a->get_size()[0], as_cuda_type(a->get_const_value()),
a->get_const_col_idxs(), as_cuda_type(a->get_const_row_ptrs()),
acc::as_cuda_range(b_vals), acc::as_cuda_range(c_vals));
} else if (alpha != nullptr && beta != nullptr) {
if (grid.x > 0 && grid.y > 0) {
kernel::abstract_classical_spmv<subwarp_size>
<<<grid, block, 0, 0>>>(
a->get_size()[0], as_cuda_type(alpha->get_const_values()),
as_cuda_type(a->get_const_value()), a->get_const_col_idxs(),
as_cuda_type(a->get_const_row_ptrs()),
acc::as_cuda_range(b_vals),
as_cuda_type(beta->get_const_values()),
acc::as_cuda_range(c_vals));
}
kernel::abstract_classical_spmv<subwarp_size><<<grid, block, 0, 0>>>(
a->get_size()[0], as_cuda_type(alpha->get_const_values()),
as_cuda_type(a->get_const_value()), a->get_const_col_idxs(),
as_cuda_type(a->get_const_row_ptrs()), acc::as_cuda_range(b_vals),
as_cuda_type(beta->get_const_values()), acc::as_cuda_range(c_vals));
} else {
GKO_KERNEL_NOT_FOUND;
}
Expand Down
22 changes: 9 additions & 13 deletions dpcpp/matrix/sparsity_csr_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,20 +235,16 @@ void classical_spmv(syn::value_list<int, subgroup_size>,
return;
}
if (alpha == nullptr && beta == nullptr) {
if (grid.x > 0 && grid.y > 0) {
kernel::abstract_classical_spmv<subgroup_size>(
grid, block, 0, exec->get_queue(), a->get_size()[0],
a->get_const_value(), a->get_const_col_idxs(),
a->get_const_row_ptrs(), b_vals, c_vals);
}
kernel::abstract_classical_spmv<subgroup_size>(
grid, block, 0, exec->get_queue(), a->get_size()[0],
a->get_const_value(), a->get_const_col_idxs(),
a->get_const_row_ptrs(), b_vals, c_vals);
} else if (alpha != nullptr && beta != nullptr) {
if (grid.x > 0 && grid.y > 0) {
kernel::abstract_classical_spmv<subgroup_size>(
grid, block, 0, exec->get_queue(), a->get_size()[0],
alpha->get_const_values(), a->get_const_value(),
a->get_const_col_idxs(), a->get_const_row_ptrs(), b_vals,
beta->get_const_values(), c_vals);
}
kernel::abstract_classical_spmv<subgroup_size>(
grid, block, 0, exec->get_queue(), a->get_size()[0],
alpha->get_const_values(), a->get_const_value(),
a->get_const_col_idxs(), a->get_const_row_ptrs(), b_vals,
beta->get_const_values(), c_vals);
} else {
GKO_KERNEL_NOT_FOUND;
}
Expand Down
31 changes: 13 additions & 18 deletions hip/matrix/sparsity_csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,25 +121,20 @@ void classical_spmv(syn::value_list<int, subwarp_size>,
return;
}
if (alpha == nullptr && beta == nullptr) {
if (grid.x > 0 && grid.y > 0) {
hipLaunchKernelGGL(
HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
grid, block, 0, 0, a->get_size()[0],
as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
acc::as_hip_range(c_vals));
}
hipLaunchKernelGGL(
HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
grid, block, 0, 0, a->get_size()[0],
as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
acc::as_hip_range(c_vals));
} else if (alpha != nullptr && beta != nullptr) {
if (grid.x > 0 && grid.y > 0) {
hipLaunchKernelGGL(
HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
grid, block, 0, 0, a->get_size()[0],
as_hip_type(alpha->get_const_values()),
as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
as_hip_type(beta->get_const_values()),
acc::as_hip_range(c_vals));
}
hipLaunchKernelGGL(
HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
grid, block, 0, 0, a->get_size()[0],
as_hip_type(alpha->get_const_values()),
as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
as_hip_type(beta->get_const_values()), acc::as_hip_range(c_vals));
} else {
GKO_KERNEL_NOT_FOUND;
}
Expand Down
42 changes: 22 additions & 20 deletions omp/matrix/sparsity_csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,22 @@ void spmv(std::shared_ptr<const OmpExecutor> exec,
const matrix::Dense<InputValueType>* b,
matrix::Dense<OutputValueType>* c)
{
using arithmetic_type =
highest_precision<InputValueType, OutputValueType, MatrixValueType>;
auto row_ptrs = a->get_const_row_ptrs();
auto col_idxs = a->get_const_col_idxs();
const auto val = static_cast<OutputValueType>(a->get_const_value()[0]);
const auto val = static_cast<arithmetic_type>(a->get_const_value()[0]);

#pragma omp parallel for
for (size_type row = 0; row < a->get_size()[0]; ++row) {
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) = zero<OutputValueType>();
}
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
auto col = col_idxs[k];
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) +=
val * static_cast<OutputValueType>(b->at(col, j));
auto temp_val = gko::zero<arithmetic_type>();
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
temp_val +=
val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
}
c->at(row, j) = static_cast<OutputValueType>(temp_val);
}
}
}
Expand All @@ -103,24 +103,26 @@ void advanced_spmv(std::shared_ptr<const OmpExecutor> exec,
const matrix::Dense<OutputValueType>* beta,
matrix::Dense<OutputValueType>* c)
{
using arithmetic_type =
highest_precision<InputValueType, OutputValueType, MatrixValueType>;
auto row_ptrs = a->get_const_row_ptrs();
auto col_idxs = a->get_const_col_idxs();
const auto valpha = static_cast<OutputValueType>(alpha->at(0, 0));
const auto vbeta = beta->at(0, 0);
const auto val = static_cast<OutputValueType>(a->get_const_value()[0]);
const auto valpha = static_cast<arithmetic_type>(alpha->at(0, 0));
const auto vbeta = static_cast<arithmetic_type>(beta->at(0, 0));
const auto val = static_cast<arithmetic_type>(a->get_const_value()[0]);

#pragma omp parallel for
for (size_type row = 0; row < a->get_size()[0]; ++row) {
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) *= vbeta;
}
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
auto col = col_idxs[k];
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) +=
valpha * val * static_cast<OutputValueType>(b->at(col, j));
auto temp_val = gko::zero<arithmetic_type>();
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
temp_val +=
val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
}
c->at(row, j) = static_cast<OutputValueType>(
vbeta * static_cast<arithmetic_type>(c->at(row, j)) +
valpha * temp_val);
}
}
}
Expand Down
32 changes: 15 additions & 17 deletions reference/matrix/sparsity_csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,13 @@ void spmv(std::shared_ptr<const ReferenceExecutor> exec,

for (size_type row = 0; row < a->get_size()[0]; ++row) {
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) = zero<OutputValueType>();
}
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
auto col = col_idxs[k];
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) += static_cast<OutputValueType>(
val * static_cast<arithmetic_type>(b->at(col, j)));
auto temp_val = gko::zero<arithmetic_type>();
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
temp_val +=
val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
}
c->at(row, j) = static_cast<OutputValueType>(temp_val);
}
}
}
Expand All @@ -107,20 +105,20 @@ void advanced_spmv(std::shared_ptr<const ReferenceExecutor> exec,
auto row_ptrs = a->get_const_row_ptrs();
auto col_idxs = a->get_const_col_idxs();
const auto valpha = static_cast<arithmetic_type>(alpha->at(0, 0));
const auto vbeta = static_cast<OutputValueType>(beta->at(0, 0));
const auto vbeta = static_cast<arithmetic_type>(beta->at(0, 0));
const auto val = static_cast<arithmetic_type>(a->get_const_value()[0]);

for (size_type row = 0; row < a->get_size()[0]; ++row) {
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) *= vbeta;
}
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
auto col = col_idxs[k];
for (size_type j = 0; j < c->get_size()[1]; ++j) {
c->at(row, j) += static_cast<OutputValueType>(
valpha * val * static_cast<arithmetic_type>(b->at(col, j)));
auto temp_val = gko::zero<arithmetic_type>();
for (size_type k = row_ptrs[row];
k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
temp_val +=
val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
}
c->at(row, j) = static_cast<OutputValueType>(
vbeta * static_cast<arithmetic_type>(c->at(row, j)) +
valpha * temp_val);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/matrix/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ struct SparsityCsr
using entry_type =
gko::matrix_data<matrix_value_type, int>::nonzero_type;
for (auto& entry : data.nonzeros) {
entry = entry_type{entry.row, entry.column, matrix_value_type{1}};
entry.value = gko::one<matrix_value_type>();
}
}
};
Expand Down

0 comments on commit ff428ce

Please sign in to comment.