Skip to content

Commit

Permalink
review updates
Browse files Browse the repository at this point in the history
* put component kernels into namespace
* add tests for conversion via other executor
* add tests for rounding conversion

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
  • Loading branch information
4 people committed May 2, 2020
1 parent 87ca0c0 commit aab6d14
Show file tree
Hide file tree
Showing 43 changed files with 246 additions and 92 deletions.
2 changes: 1 addition & 1 deletion core/base/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ namespace gko {
namespace conversion {


GKO_REGISTER_OPERATION(convert, convert_precision);
GKO_REGISTER_OPERATION(convert, components::convert_precision);


} // namespace conversion
Expand Down
8 changes: 8 additions & 0 deletions core/components/precision_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,38 @@ namespace kernels {


namespace omp {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace omp


namespace cuda {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace cuda


namespace reference {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace reference


namespace hip {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace hip


Expand Down
8 changes: 8 additions & 0 deletions core/components/prefix_sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,38 @@ namespace kernels {


namespace omp {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace omp


namespace cuda {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace cuda


namespace reference {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace reference


namespace hip {
namespace components {

GKO_DECLARE_ALL_AS_TEMPLATES;

} // namespace components
} // namespace hip


Expand Down
5 changes: 4 additions & 1 deletion core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace gko {
namespace kernels {
namespace GKO_HOOK_MODULE {
namespace components {


template <typename SourceType, typename TargetType>
GKO_DECLARE_CONVERT_PRECISION_KERNEL(SourceType, TargetType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);


template <typename IndexType>
GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
Expand All @@ -84,6 +84,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);
template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);


} // namespace components


namespace dense {


Expand Down
2 changes: 2 additions & 0 deletions cuda/components/precision_conversion.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace gko {
namespace kernels {
namespace cuda {
namespace components {


#include "common/components/precision_conversion.hpp.inc"
Expand All @@ -60,6 +61,7 @@ void convert_precision(std::shared_ptr<const DefaultExecutor> exec,
GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);


} // namespace components
} // namespace cuda
} // namespace kernels
} // namespace gko
8 changes: 4 additions & 4 deletions cuda/components/prefix_sum.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace gko {
namespace kernels {
namespace cuda {
namespace components {


constexpr int prefix_sum_block_size = 512;
Expand All @@ -61,12 +62,11 @@ void prefix_sum(std::shared_ptr<const CudaExecutor> exec, IndexType *counts,

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);

// explicitly instantiate for size_type as well, as this is used in the SellP
// format
template void prefix_sum<size_type>(std::shared_ptr<const CudaExecutor> exec,
size_type *counts, size_type num_entries);
// instantiate for size_type as well, as this is used in the Sellp format
template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);


} // namespace components
} // namespace cuda
} // namespace kernels
} // namespace gko
6 changes: 3 additions & 3 deletions cuda/factorization/factorization_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void add_diagonal_elements(std::shared_ptr<const CudaExecutor> exec,
return;
}

prefix_sum(exec, cuda_row_ptrs_add, row_ptrs_size);
components::prefix_sum(exec, cuda_row_ptrs_add, row_ptrs_size);
exec->synchronize();

auto total_additions =
Expand Down Expand Up @@ -162,8 +162,8 @@ void initialize_row_ptrs_l_u(
as_cuda_type(system_matrix->get_const_values()),
as_cuda_type(l_row_ptrs), as_cuda_type(u_row_ptrs));

prefix_sum(exec, l_row_ptrs, num_rows + 1);
prefix_sum(exec, u_row_ptrs, num_rows + 1);
components::prefix_sum(exec, l_row_ptrs, num_rows + 1);
components::prefix_sum(exec, u_row_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
4 changes: 2 additions & 2 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ void convert_to_sellp(std::shared_ptr<const CudaExecutor> exec,
as_cuda_type(nnz_per_row.get_const_data()), as_cuda_type(slice_lengths),
as_cuda_type(slice_sets));

prefix_sum(exec, slice_sets, slice_num + 1);
components::prefix_sum(exec, slice_sets, slice_num + 1);

grid_dim = ceildiv(num_rows, default_block_size);
kernel::fill_in_sellp<<<grid_dim, default_block_size>>>(
Expand Down Expand Up @@ -927,7 +927,7 @@ void convert_to_hybrid(std::shared_ptr<const CudaExecutor> exec,
num_rows, max_nnz_per_row, as_cuda_type(source->get_const_row_ptrs()),
as_cuda_type(coo_offset.get_data()));

prefix_sum(exec, coo_offset.get_data(), num_rows);
components::prefix_sum(exec, coo_offset.get_data(), num_rows);

grid_dim = ceildiv(num_rows * config::warp_size, default_block_size);
kernel::fill_in_hybrid<<<grid_dim, default_block_size>>>(
Expand Down
6 changes: 3 additions & 3 deletions cuda/matrix/dense_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ void convert_to_coo(std::shared_ptr<const CudaExecutor> exec,
auto nnz_prefix_sum = Array<size_type>(exec, num_rows);
calculate_nonzeros_per_row(exec, source, &nnz_prefix_sum);

prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows);
components::prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows);

size_type grid_dim = ceildiv(num_rows, default_block_size);

Expand Down Expand Up @@ -288,7 +288,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
num_rows, num_cols, stride, as_cuda_type(source->get_const_values()),
as_cuda_type(row_ptrs));

prefix_sum(exec, row_ptrs, num_rows + 1);
components::prefix_sum(exec, row_ptrs, num_rows + 1);

size_type grid_dim = ceildiv(num_rows, default_block_size);

Expand Down Expand Up @@ -369,7 +369,7 @@ void convert_to_sellp(std::shared_ptr<const CudaExecutor> exec,
as_cuda_type(nnz_per_row.get_const_data()), as_cuda_type(slice_lengths),
as_cuda_type(slice_sets));

prefix_sum(exec, slice_sets, slice_num + 1);
components::prefix_sum(exec, slice_sets, slice_num + 1);

grid_dim = ceildiv(num_rows, default_block_size);
kernel::fill_in_sellp<<<grid_dim, default_block_size>>>(
Expand Down
2 changes: 1 addition & 1 deletion cuda/matrix/ell_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
num_rows, max_nnz_per_row, stride,
as_cuda_type(source->get_const_values()), as_cuda_type(row_ptrs));

prefix_sum(exec, row_ptrs, num_rows + 1);
components::prefix_sum(exec, row_ptrs, num_rows + 1);

size_type grid_dim = ceildiv(num_rows, default_block_size);

Expand Down
2 changes: 1 addition & 1 deletion cuda/matrix/hybrid_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
num_rows, as_cuda_type(row_ptrs),
as_cuda_type(coo_row_ptrs.get_const_data()));

prefix_sum(exec, row_ptrs, num_rows + 1);
components::prefix_sum(exec, row_ptrs, num_rows + 1);

// Fill the value
grid_num = ceildiv(num_rows, default_block_size);
Expand Down
2 changes: 1 addition & 1 deletion cuda/matrix/sellp_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
grid_dim = ceildiv(num_rows + 1, default_block_size);
auto add_values = Array<IndexType>(exec, grid_dim);

prefix_sum(exec, result_row_ptrs, num_rows + 1);
components::prefix_sum(exec, result_row_ptrs, num_rows + 1);

grid_dim = ceildiv(num_rows, default_block_size);

Expand Down
46 changes: 42 additions & 4 deletions cuda/test/components/precision_conversion.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#include "core/components/prefix_sum.hpp"


#include <cstring>
#include <limits>
#include <memory>
#include <random>
Expand Down Expand Up @@ -60,8 +58,12 @@ protected:
total_size(42793),
vals(ref, total_size),
cvals(ref, total_size),
vals2(ref, 1),
expected_float(ref, 1),
expected_double(ref, 1),
dvals(exec),
dcvals(exec)
dcvals(exec),
dvals2(exec)
{
auto maxval = 1e10f;
std::uniform_real_distribution<float> dist(-maxval, maxval);
Expand All @@ -71,6 +73,13 @@ protected:
}
dvals = vals;
dcvals = cvals;
gko::uint64 rawdouble = 0x4218888000889111ULL;
gko::uint32 rawfloat = 0x50c44400ULL;
gko::uint64 rawrounded = 0x4218888000000000ULL;
std::memcpy(vals2.get_data(), &rawdouble, sizeof(double));
std::memcpy(expected_float.get_data(), &rawfloat, sizeof(float));
std::memcpy(expected_double.get_data(), &rawrounded, sizeof(double));
dvals2 = vals2;
}

std::shared_ptr<gko::ReferenceExecutor> ref;
Expand All @@ -79,6 +88,10 @@ protected:
gko::size_type total_size;
gko::Array<float> vals;
gko::Array<float> dvals;
gko::Array<double> vals2;
gko::Array<double> dvals2;
gko::Array<float> expected_float;
gko::Array<double> expected_double;
gko::Array<std::complex<float>> cvals;
gko::Array<std::complex<float>> dcvals;
};
Expand All @@ -96,6 +109,18 @@ TEST_F(PrecisionConversion, ConvertsReal)
}


TEST_F(PrecisionConversion, ConvertsRealViaRef)
{
gko::Array<double> tmp{ref};
gko::Array<float> dout;

tmp = dvals;
dout = tmp;

GKO_ASSERT_ARRAY_EQ(&dvals, &dout);
}


TEST_F(PrecisionConversion, ConvertsComplex)
{
gko::Array<std::complex<double>> dtmp;
Expand All @@ -108,6 +133,19 @@ TEST_F(PrecisionConversion, ConvertsComplex)
}


TEST_F(PrecisionConversion, ConversionRounds)
{
gko::Array<float> dtmp;
gko::Array<double> dout;

dtmp = dvals2;
dout = dtmp;

GKO_ASSERT_ARRAY_EQ(&dtmp, &expected_float);
GKO_ASSERT_ARRAY_EQ(&dout, &expected_double);
}


TEST_F(PrecisionConversion, ConvertsRealFromRef)
{
gko::Array<double> dtmp;
Expand Down
6 changes: 4 additions & 2 deletions cuda/test/components/prefix_sum.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@ protected:

void test(gko::size_type size)
{
gko::kernels::reference::prefix_sum(ref, vals.get_data(), size);
gko::kernels::cuda::prefix_sum(exec, dvals.get_data(), size);
gko::kernels::reference::components::prefix_sum(ref, vals.get_data(),
size);
gko::kernels::cuda::components::prefix_sum(exec, dvals.get_data(),
size);

gko::Array<index_type> dresult(ref, dvals);
auto dptr = dresult.get_const_data();
Expand Down
2 changes: 2 additions & 0 deletions hip/components/precision_conversion.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace gko {
namespace kernels {
namespace hip {
namespace components {


#include "common/components/precision_conversion.hpp.inc"
Expand All @@ -61,6 +62,7 @@ void convert_precision(std::shared_ptr<const DefaultExecutor> exec,
GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);


} // namespace components
} // namespace hip
} // namespace kernels
} // namespace gko
8 changes: 4 additions & 4 deletions hip/components/prefix_sum.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace gko {
namespace kernels {
namespace hip {
namespace components {


constexpr int prefix_sum_block_size = 512;
Expand All @@ -62,12 +63,11 @@ void prefix_sum(std::shared_ptr<const HipExecutor> exec, IndexType *counts,

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);

// explicitly instantiate for size_type as well, as this is used in the SellP
// format
template void prefix_sum<size_type>(std::shared_ptr<const HipExecutor> exec,
size_type *counts, size_type num_entries);
// instantiate for size_type as well, as this is used in the Sellp format
template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);


} // namespace components
} // namespace hip
} // namespace kernels
} // namespace gko
Loading

0 comments on commit aab6d14

Please sign in to comment.