review updates

* put component kernels into namespace * add tests for conversion via other executor * add tests for rounding conversion Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu> Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com> Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
ginkgo-project · May 2, 2020 · aab6d14 · aab6d14
1 parent 87ca0c0
commit aab6d14
Show file tree

Hide file tree

Showing 43 changed files with 246 additions and 92 deletions.
diff --git a/core/base/array.cpp b/core/base/array.cpp
@@ -43,7 +43,7 @@ namespace gko {
 namespace conversion {
 
 
-GKO_REGISTER_OPERATION(convert, convert_precision);
+GKO_REGISTER_OPERATION(convert, components::convert_precision);
 
 
 }  // namespace conversion

diff --git a/core/components/precision_conversion.hpp b/core/components/precision_conversion.hpp
@@ -58,30 +58,38 @@ namespace kernels {
 
 
 namespace omp {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace omp
 
 
 namespace cuda {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace cuda
 
 
 namespace reference {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace reference
 
 
 namespace hip {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace hip
 
 

diff --git a/core/components/prefix_sum.hpp b/core/components/prefix_sum.hpp
@@ -56,30 +56,38 @@ namespace kernels {
 
 
 namespace omp {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace omp
 
 
 namespace cuda {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace cuda
 
 
 namespace reference {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace reference
 
 
 namespace hip {
+namespace components {
 
 GKO_DECLARE_ALL_AS_TEMPLATES;
 
+}  // namespace components
 }  // namespace hip
 
 

diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
@@ -68,14 +68,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace GKO_HOOK_MODULE {
+namespace components {
 
 
 template <typename SourceType, typename TargetType>
 GKO_DECLARE_CONVERT_PRECISION_KERNEL(SourceType, TargetType)
 GKO_NOT_COMPILED(GKO_HOOK_MODULE);
 GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);
 
-
 template <typename IndexType>
 GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType)
 GKO_NOT_COMPILED(GKO_HOOK_MODULE);
@@ -84,6 +84,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);
 template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);
 
 
+}  // namespace components
+
+
 namespace dense {
 
 

diff --git a/cuda/components/precision_conversion.cu b/cuda/components/precision_conversion.cu
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
+namespace components {
 
 
 #include "common/components/precision_conversion.hpp.inc"
@@ -60,6 +61,7 @@ void convert_precision(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);
 
 
+}  // namespace components
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/cuda/components/prefix_sum.cu b/cuda/components/prefix_sum.cu
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
+namespace components {
 
 
 constexpr int prefix_sum_block_size = 512;
@@ -61,12 +62,11 @@ void prefix_sum(std::shared_ptr<const CudaExecutor> exec, IndexType *counts,
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);
 
-// explicitly instantiate for size_type as well, as this is used in the SellP
-// format
-template void prefix_sum<size_type>(std::shared_ptr<const CudaExecutor> exec,
-                                    size_type *counts, size_type num_entries);
+// instantiate for size_type as well, as this is used in the Sellp format
+template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);
 
 
+}  // namespace components
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/cuda/factorization/factorization_kernels.cu b/cuda/factorization/factorization_kernels.cu
@@ -110,7 +110,7 @@ void add_diagonal_elements(std::shared_ptr<const CudaExecutor> exec,
         return;
     }
 
-    prefix_sum(exec, cuda_row_ptrs_add, row_ptrs_size);
+    components::prefix_sum(exec, cuda_row_ptrs_add, row_ptrs_size);
     exec->synchronize();
 
     auto total_additions =
@@ -162,8 +162,8 @@ void initialize_row_ptrs_l_u(
         as_cuda_type(system_matrix->get_const_values()),
         as_cuda_type(l_row_ptrs), as_cuda_type(u_row_ptrs));
 
-    prefix_sum(exec, l_row_ptrs, num_rows + 1);
-    prefix_sum(exec, u_row_ptrs, num_rows + 1);
+    components::prefix_sum(exec, l_row_ptrs, num_rows + 1);
+    components::prefix_sum(exec, u_row_ptrs, num_rows + 1);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu
@@ -676,7 +676,7 @@ void convert_to_sellp(std::shared_ptr<const CudaExecutor> exec,
         as_cuda_type(nnz_per_row.get_const_data()), as_cuda_type(slice_lengths),
         as_cuda_type(slice_sets));
 
-    prefix_sum(exec, slice_sets, slice_num + 1);
+    components::prefix_sum(exec, slice_sets, slice_num + 1);
 
     grid_dim = ceildiv(num_rows, default_block_size);
     kernel::fill_in_sellp<<<grid_dim, default_block_size>>>(
@@ -927,7 +927,7 @@ void convert_to_hybrid(std::shared_ptr<const CudaExecutor> exec,
         num_rows, max_nnz_per_row, as_cuda_type(source->get_const_row_ptrs()),
         as_cuda_type(coo_offset.get_data()));
 
-    prefix_sum(exec, coo_offset.get_data(), num_rows);
+    components::prefix_sum(exec, coo_offset.get_data(), num_rows);
 
     grid_dim = ceildiv(num_rows * config::warp_size, default_block_size);
     kernel::fill_in_hybrid<<<grid_dim, default_block_size>>>(

diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu
@@ -252,7 +252,7 @@ void convert_to_coo(std::shared_ptr<const CudaExecutor> exec,
     auto nnz_prefix_sum = Array<size_type>(exec, num_rows);
     calculate_nonzeros_per_row(exec, source, &nnz_prefix_sum);
 
-    prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows);
+    components::prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows);
 
     size_type grid_dim = ceildiv(num_rows, default_block_size);
 
@@ -288,7 +288,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
         num_rows, num_cols, stride, as_cuda_type(source->get_const_values()),
         as_cuda_type(row_ptrs));
 
-    prefix_sum(exec, row_ptrs, num_rows + 1);
+    components::prefix_sum(exec, row_ptrs, num_rows + 1);
 
     size_type grid_dim = ceildiv(num_rows, default_block_size);
 
@@ -369,7 +369,7 @@ void convert_to_sellp(std::shared_ptr<const CudaExecutor> exec,
         as_cuda_type(nnz_per_row.get_const_data()), as_cuda_type(slice_lengths),
         as_cuda_type(slice_sets));
 
-    prefix_sum(exec, slice_sets, slice_num + 1);
+    components::prefix_sum(exec, slice_sets, slice_num + 1);
 
     grid_dim = ceildiv(num_rows, default_block_size);
     kernel::fill_in_sellp<<<grid_dim, default_block_size>>>(

diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu
@@ -308,7 +308,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
         num_rows, max_nnz_per_row, stride,
         as_cuda_type(source->get_const_values()), as_cuda_type(row_ptrs));
 
-    prefix_sum(exec, row_ptrs, num_rows + 1);
+    components::prefix_sum(exec, row_ptrs, num_rows + 1);
 
     size_type grid_dim = ceildiv(num_rows, default_block_size);
 

diff --git a/cuda/matrix/hybrid_kernels.cu b/cuda/matrix/hybrid_kernels.cu
@@ -129,7 +129,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
         num_rows, as_cuda_type(row_ptrs),
         as_cuda_type(coo_row_ptrs.get_const_data()));
 
-    prefix_sum(exec, row_ptrs, num_rows + 1);
+    components::prefix_sum(exec, row_ptrs, num_rows + 1);
 
     // Fill the value
     grid_num = ceildiv(num_rows, default_block_size);

diff --git a/cuda/matrix/sellp_kernels.cu b/cuda/matrix/sellp_kernels.cu
@@ -176,7 +176,7 @@ void convert_to_csr(std::shared_ptr<const CudaExecutor> exec,
     grid_dim = ceildiv(num_rows + 1, default_block_size);
     auto add_values = Array<IndexType>(exec, grid_dim);
 
-    prefix_sum(exec, result_row_ptrs, num_rows + 1);
+    components::prefix_sum(exec, result_row_ptrs, num_rows + 1);
 
     grid_dim = ceildiv(num_rows, default_block_size);
 

diff --git a/cuda/test/components/precision_conversion.cu b/cuda/test/components/precision_conversion.cu
@@ -30,9 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/components/prefix_sum.hpp"
-
-
+#include <cstring>
 #include <limits>
 #include <memory>
 #include <random>
@@ -60,8 +58,12 @@ protected:
           total_size(42793),
           vals(ref, total_size),
           cvals(ref, total_size),
+          vals2(ref, 1),
+          expected_float(ref, 1),
+          expected_double(ref, 1),
           dvals(exec),
-          dcvals(exec)
+          dcvals(exec),
+          dvals2(exec)
     {
         auto maxval = 1e10f;
         std::uniform_real_distribution<float> dist(-maxval, maxval);
@@ -71,6 +73,13 @@ protected:
         }
         dvals = vals;
         dcvals = cvals;
+        gko::uint64 rawdouble = 0x4218888000889111ULL;
+        gko::uint32 rawfloat = 0x50c44400ULL;
+        gko::uint64 rawrounded = 0x4218888000000000ULL;
+        std::memcpy(vals2.get_data(), &rawdouble, sizeof(double));
+        std::memcpy(expected_float.get_data(), &rawfloat, sizeof(float));
+        std::memcpy(expected_double.get_data(), &rawrounded, sizeof(double));
+        dvals2 = vals2;
     }
 
     std::shared_ptr<gko::ReferenceExecutor> ref;
@@ -79,6 +88,10 @@ protected:
     gko::size_type total_size;
     gko::Array<float> vals;
     gko::Array<float> dvals;
+    gko::Array<double> vals2;
+    gko::Array<double> dvals2;
+    gko::Array<float> expected_float;
+    gko::Array<double> expected_double;
     gko::Array<std::complex<float>> cvals;
     gko::Array<std::complex<float>> dcvals;
 };
@@ -96,6 +109,18 @@ TEST_F(PrecisionConversion, ConvertsReal)
 }
 
 
+TEST_F(PrecisionConversion, ConvertsRealViaRef)
+{
+    gko::Array<double> tmp{ref};
+    gko::Array<float> dout;
+
+    tmp = dvals;
+    dout = tmp;
+
+    GKO_ASSERT_ARRAY_EQ(&dvals, &dout);
+}
+
+
 TEST_F(PrecisionConversion, ConvertsComplex)
 {
     gko::Array<std::complex<double>> dtmp;
@@ -108,6 +133,19 @@ TEST_F(PrecisionConversion, ConvertsComplex)
 }
 
 
+TEST_F(PrecisionConversion, ConversionRounds)
+{
+    gko::Array<float> dtmp;
+    gko::Array<double> dout;
+
+    dtmp = dvals2;
+    dout = dtmp;
+
+    GKO_ASSERT_ARRAY_EQ(&dtmp, &expected_float);
+    GKO_ASSERT_ARRAY_EQ(&dout, &expected_double);
+}
+
+
 TEST_F(PrecisionConversion, ConvertsRealFromRef)
 {
     gko::Array<double> dtmp;

diff --git a/cuda/test/components/prefix_sum.cu b/cuda/test/components/prefix_sum.cu
@@ -70,8 +70,10 @@ protected:
 
     void test(gko::size_type size)
     {
-        gko::kernels::reference::prefix_sum(ref, vals.get_data(), size);
-        gko::kernels::cuda::prefix_sum(exec, dvals.get_data(), size);
+        gko::kernels::reference::components::prefix_sum(ref, vals.get_data(),
+                                                        size);
+        gko::kernels::cuda::components::prefix_sum(exec, dvals.get_data(),
+                                                   size);
 
         gko::Array<index_type> dresult(ref, dvals);
         auto dptr = dresult.get_const_data();

diff --git a/hip/components/precision_conversion.hip.cpp b/hip/components/precision_conversion.hip.cpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
+namespace components {
 
 
 #include "common/components/precision_conversion.hpp.inc"
@@ -61,6 +62,7 @@ void convert_precision(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);
 
 
+}  // namespace components
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/components/prefix_sum.hip.cpp b/hip/components/prefix_sum.hip.cpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
+namespace components {
 
 
 constexpr int prefix_sum_block_size = 512;
@@ -62,12 +63,11 @@ void prefix_sum(std::shared_ptr<const HipExecutor> exec, IndexType *counts,
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);
 
-// explicitly instantiate for size_type as well, as this is used in the SellP
-// format
-template void prefix_sum<size_type>(std::shared_ptr<const HipExecutor> exec,
-                                    size_type *counts, size_type num_entries);
+// instantiate for size_type as well, as this is used in the Sellp format
+template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);
 
 
+}  // namespace components
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko