Skip to content

Commit

Permalink
Updates from code review
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzgoebel committed Jul 27, 2020
1 parent 9b8c5da commit 31d99e8
Show file tree
Hide file tree
Showing 16 changed files with 342 additions and 121 deletions.
15 changes: 15 additions & 0 deletions common/matrix/diagonal_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,19 @@ __global__ __launch_bounds__(default_block_size) void convert_to_csr(
}


template <typename ValueType>
__global__ __launch_bounds__(default_block_size) void conj_transpose(
size_type size, const ValueType *__restrict__ orig_values,
ValueType *__restrict__ trans_values)
{
const auto tidx = thread::get_thread_id_flat();

if (tidx >= size) {
return;
}

trans_values[tidx] = conj(orig_values[tidx]);
}


} // namespace kernel
6 changes: 6 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL);


} // namespace diagonal

Expand Down
49 changes: 48 additions & 1 deletion core/matrix/diagonal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ GKO_REGISTER_OPERATION(right_apply_to_dense, diagonal::right_apply_to_dense);
GKO_REGISTER_OPERATION(apply_to_csr, diagonal::apply_to_csr);
GKO_REGISTER_OPERATION(right_apply_to_csr, diagonal::right_apply_to_csr);
GKO_REGISTER_OPERATION(convert_to_csr, diagonal::convert_to_csr);
GKO_REGISTER_OPERATION(conj_transpose, diagonal::conj_transpose);


} // namespace diagonal
Expand Down Expand Up @@ -123,7 +124,12 @@ std::unique_ptr<LinOp> Diagonal<ValueType, IndexType>::transpose() const
template <typename ValueType, typename IndexType>
std::unique_ptr<LinOp> Diagonal<ValueType, IndexType>::conj_transpose() const
{
return this->clone();
auto exec = this->get_executor();
auto tmp =
Diagonal<ValueType, IndexType>::create(exec, this->get_size()[0]);

exec->run(diagonal::make_conj_transpose(this, tmp.get()));
return std::move(tmp);
}


Expand All @@ -146,6 +152,47 @@ void Diagonal<ValueType, IndexType>::move_to(Csr<ValueType, IndexType> *result)
}


namespace {


template <typename MatrixType, typename MatrixData>
inline void write_impl(const MatrixType *mtx, MatrixData &data)
{
std::unique_ptr<const LinOp> op{};
const MatrixType *tmp{};
if (mtx->get_executor()->get_master() != mtx->get_executor()) {
op = mtx->clone(mtx->get_executor()->get_master());
tmp = static_cast<const MatrixType *>(op.get());
} else {
tmp = mtx;
}

data = {tmp->get_size(), {}};
const auto values = tmp->get_const_values();

for (size_type row = 0; row < data.size[0]; ++row) {
data.nonzeros.emplace_back(row, row, values[row]);
}
}


} // namespace


template <typename ValueType, typename IndexType>
void Diagonal<ValueType, IndexType>::write(mat_data &data) const
{
write_impl(this, data);
}


template <typename ValueType, typename IndexType>
void Diagonal<ValueType, IndexType>::write(mat_data32 &data) const
{
write_impl(this, data);
}


#define GKO_DECLARE_DIAGONAL_MATRIX(value_type, index_type) \
class Diagonal<value_type, index_type>
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DIAGONAL_MATRIX);
Expand Down
9 changes: 8 additions & 1 deletion core/matrix/diagonal_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ namespace kernels {
const matrix::Diagonal<ValueType, IndexType> *source, \
matrix::Csr<ValueType, IndexType> *result)

#define GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \
void conj_transpose(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Diagonal<ValueType, IndexType> *orig, \
matrix::Diagonal<ValueType, IndexType> *trans)

#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL(ValueType, IndexType); \
Expand All @@ -90,7 +95,9 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL(ValueType, IndexType)
GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType)


namespace omp {
Expand Down
18 changes: 18 additions & 0 deletions cuda/matrix/diagonal_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL);


template <typename ValueType, typename IndexType>
void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
const matrix::Diagonal<ValueType, IndexType> *orig,
matrix::Diagonal<ValueType, IndexType> *trans)
{
const auto size = orig->get_size()[0];
const auto grid_dim = ceildiv(size, default_block_size);
const auto orig_values = orig->get_const_values();
auto trans_values = trans->get_values();

kernel::conj_transpose<<<grid_dim, default_block_size>>>(
size, as_cuda_type(orig_values), as_cuda_type(trans_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL);


} // namespace diagonal
} // namespace cuda
} // namespace kernels
Expand Down
2 changes: 1 addition & 1 deletion cuda/test/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ TEST_F(Dense, AddsScaledDiagIsEquivalentToRef)
mat->add_scaled(alpha.get(), diag.get());
dmat->add_scaled(dalpha.get(), ddiag.get());

GKO_ASSERT_MTX_NEAR(mat, dmat, 0);
GKO_ASSERT_MTX_NEAR(mat, dmat, 1e-14);
}


Expand Down
45 changes: 41 additions & 4 deletions cuda/test/matrix/diagonal_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class Diagonal : public ::testing::Test {
using Diag = gko::matrix::Diagonal<>;
using Dense = gko::matrix::Dense<>;
using Arr = gko::Array<int>;
using ComplexDiag = gko::matrix::Diagonal<std::complex<double>>;

Diagonal() : mtx_size(532, 231), rand_engine(42) {}

Expand Down Expand Up @@ -98,6 +99,19 @@ class Diagonal : public ::testing::Test {
return diag;
}

std::unique_ptr<ComplexDiag> gen_cdiag(int size)
{
auto cdiag = ComplexDiag::create(ref, size);
auto vals = cdiag->get_values();
auto value_dist = std::normal_distribution<>(0.0, 1.0);
for (int i = 0; i < size; i++) {
vals[i] = std::complex<double>{
gko::test::detail::get_rand_value<std::complex<double>>(
value_dist, rand_engine)};
}
return cdiag;
}

void set_up_apply_data()
{
diag = gen_diag(mtx_size[0]);
Expand Down Expand Up @@ -129,6 +143,13 @@ class Diagonal : public ::testing::Test {
csrresult2->copy_from(csrexpected2.get());
}

void set_up_complex_data()
{
cdiag = gen_cdiag(mtx_size[0]);
dcdiag = ComplexDiag::create(cuda);
dcdiag->copy_from(cdiag.get());
}

std::shared_ptr<gko::ReferenceExecutor> ref;
std::shared_ptr<const gko::CudaExecutor> cuda;

Expand All @@ -137,6 +158,8 @@ class Diagonal : public ::testing::Test {

std::unique_ptr<Diag> diag;
std::unique_ptr<Diag> ddiag;
std::unique_ptr<ComplexDiag> cdiag;
std::unique_ptr<ComplexDiag> dcdiag;

std::unique_ptr<Dense> dense1;
std::unique_ptr<Dense> dense2;
Expand Down Expand Up @@ -164,7 +187,7 @@ TEST_F(Diagonal, ApplyToDenseIsEquivalentToRef)
diag->apply(dense1.get(), denseexpected1.get());
ddiag->apply(ddense1.get(), denseresult1.get());

GKO_ASSERT_MTX_NEAR(denseexpected1, denseresult1, 0);
GKO_ASSERT_MTX_NEAR(denseexpected1, denseresult1, 1e-14);
}


Expand All @@ -175,7 +198,7 @@ TEST_F(Diagonal, RightApplyToDenseIsEquivalentToRef)
diag->rapply(dense2.get(), denseexpected2.get());
ddiag->rapply(ddense2.get(), denseresult2.get());

GKO_ASSERT_MTX_NEAR(denseexpected2, denseresult2, 0);
GKO_ASSERT_MTX_NEAR(denseexpected2, denseresult2, 1e-14);
}


Expand All @@ -186,7 +209,7 @@ TEST_F(Diagonal, ApplyToCsrIsEquivalentToRef)
diag->apply(csr1.get(), csrexpected1.get());
ddiag->apply(dcsr1.get(), csrresult1.get());

GKO_ASSERT_MTX_NEAR(csrexpected1, csrresult1, 0);
GKO_ASSERT_MTX_NEAR(csrexpected1, csrresult1, 1e-14);
}


Expand All @@ -197,7 +220,7 @@ TEST_F(Diagonal, RightApplyToCsrIsEquivalentToRef)
diag->rapply(csr2.get(), csrexpected2.get());
ddiag->rapply(dcsr2.get(), csrresult2.get());

GKO_ASSERT_MTX_NEAR(csrexpected2, csrresult2, 0);
GKO_ASSERT_MTX_NEAR(csrexpected2, csrresult2, 1e-14);
}


Expand All @@ -211,4 +234,18 @@ TEST_F(Diagonal, ConvertToCsrIsEquivalentToRef)
GKO_ASSERT_MTX_NEAR(csr1, dcsr1, 0);
}


TEST_F(Diagonal, ConjTransposeIsEquivalentToRef)
{
set_up_complex_data();

auto trans = cdiag->conj_transpose();
auto trans_diag = static_cast<ComplexDiag *>(trans.get());
auto dtrans = dcdiag->conj_transpose();
auto dtrans_diag = static_cast<ComplexDiag *>(dtrans.get());

GKO_ASSERT_MTX_NEAR(trans_diag, dtrans_diag, 0);
}


} // namespace
32 changes: 25 additions & 7 deletions hip/matrix/diagonal_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ void apply_to_csr(std::shared_ptr<const HipExecutor> exec,
const auto grid_dim =
ceildiv(num_rows * config::warp_size, default_block_size);
hipLaunchKernelGGL(kernel::apply_to_csr, grid_dim, default_block_size, 0, 0,
num_rows, as_cuda_type(diag_values),
as_cuda_type(csr_row_ptrs), as_cuda_type(csr_values));
num_rows, as_hip_type(diag_values),
as_hip_type(csr_row_ptrs), as_hip_type(csr_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand All @@ -154,8 +154,8 @@ void right_apply_to_csr(std::shared_ptr<const HipExecutor> exec,

const auto grid_dim = ceildiv(num_nnz, default_block_size);
hipLaunchKernelGGL(kernel::right_apply_to_csr, grid_dim, default_block_size,
0, 0, num_nnz, as_cuda_type(diag_values),
as_cuda_type(csr_col_idxs), as_cuda_type(csr_values));
0, 0, num_nnz, as_hip_type(diag_values),
as_hip_type(csr_col_idxs), as_hip_type(csr_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand All @@ -176,15 +176,33 @@ void convert_to_csr(std::shared_ptr<const HipExecutor> exec,
auto csr_values = result->get_values();

hipLaunchKernelGGL(kernel::convert_to_csr, grid_dim, default_block_size, 0,
0, size, as_cuda_type(diag_values),
as_cuda_type(row_ptrs), as_cuda_type(col_idxs),
as_cuda_type(csr_values));
0, size, as_hip_type(diag_values), as_hip_type(row_ptrs),
as_hip_type(col_idxs), as_hip_type(csr_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL);


template <typename ValueType, typename IndexType>
void conj_transpose(std::shared_ptr<const HipExecutor> exec,
const matrix::Diagonal<ValueType, IndexType> *orig,
matrix::Diagonal<ValueType, IndexType> *trans)
{
const auto size = orig->get_size()[0];
const auto grid_dim = ceildiv(size, default_block_size);
const auto orig_values = orig->get_const_values();
auto trans_values = trans->get_values();

hipLaunchKernelGGL(kernel::conj_transpose, grid_dim, default_block_size, 0,
0, size, as_hip_type(orig_values),
as_hip_type(trans_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL);


} // namespace diagonal
} // namespace hip
} // namespace kernels
Expand Down
2 changes: 1 addition & 1 deletion hip/test/matrix/dense_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ TEST_F(Dense, AddsScaledDiagIsEquivalentToRef)
mat->add_scaled(alpha.get(), diag.get());
dmat->add_scaled(dalpha.get(), ddiag.get());

GKO_ASSERT_MTX_NEAR(mat, dmat, 0);
GKO_ASSERT_MTX_NEAR(mat, dmat, 1e-14);
}


Expand Down
Loading

0 comments on commit 31d99e8

Please sign in to comment.