rm redundent guard, keep highest prec on ref/omp

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
ginkgo-project · Feb 15, 2022 · ff428ce · ff428ce
1 parent 740fbe9
commit ff428ce
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 87 deletions.
diff --git a/cuda/matrix/sparsity_csr_kernels.cu b/cuda/matrix/sparsity_csr_kernels.cu
@@ -118,25 +118,16 @@ void classical_spmv(syn::value_list<int, subwarp_size>,
         return;
     }
     if (alpha == nullptr && beta == nullptr) {
-        if (grid.x > 0 && grid.y > 0) {
-            kernel::abstract_classical_spmv<subwarp_size>
-                <<<grid, block, 0, 0>>>(
-                    a->get_size()[0], as_cuda_type(a->get_const_value()),
-                    a->get_const_col_idxs(),
-                    as_cuda_type(a->get_const_row_ptrs()),
-                    acc::as_cuda_range(b_vals), acc::as_cuda_range(c_vals));
-        }
+        kernel::abstract_classical_spmv<subwarp_size><<<grid, block, 0, 0>>>(
+            a->get_size()[0], as_cuda_type(a->get_const_value()),
+            a->get_const_col_idxs(), as_cuda_type(a->get_const_row_ptrs()),
+            acc::as_cuda_range(b_vals), acc::as_cuda_range(c_vals));
     } else if (alpha != nullptr && beta != nullptr) {
-        if (grid.x > 0 && grid.y > 0) {
-            kernel::abstract_classical_spmv<subwarp_size>
-                <<<grid, block, 0, 0>>>(
-                    a->get_size()[0], as_cuda_type(alpha->get_const_values()),
-                    as_cuda_type(a->get_const_value()), a->get_const_col_idxs(),
-                    as_cuda_type(a->get_const_row_ptrs()),
-                    acc::as_cuda_range(b_vals),
-                    as_cuda_type(beta->get_const_values()),
-                    acc::as_cuda_range(c_vals));
-        }
+        kernel::abstract_classical_spmv<subwarp_size><<<grid, block, 0, 0>>>(
+            a->get_size()[0], as_cuda_type(alpha->get_const_values()),
+            as_cuda_type(a->get_const_value()), a->get_const_col_idxs(),
+            as_cuda_type(a->get_const_row_ptrs()), acc::as_cuda_range(b_vals),
+            as_cuda_type(beta->get_const_values()), acc::as_cuda_range(c_vals));
     } else {
         GKO_KERNEL_NOT_FOUND;
     }

diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
@@ -235,20 +235,16 @@ void classical_spmv(syn::value_list<int, subgroup_size>,
         return;
     }
     if (alpha == nullptr && beta == nullptr) {
-        if (grid.x > 0 && grid.y > 0) {
-            kernel::abstract_classical_spmv<subgroup_size>(
-                grid, block, 0, exec->get_queue(), a->get_size()[0],
-                a->get_const_value(), a->get_const_col_idxs(),
-                a->get_const_row_ptrs(), b_vals, c_vals);
-        }
+        kernel::abstract_classical_spmv<subgroup_size>(
+            grid, block, 0, exec->get_queue(), a->get_size()[0],
+            a->get_const_value(), a->get_const_col_idxs(),
+            a->get_const_row_ptrs(), b_vals, c_vals);
     } else if (alpha != nullptr && beta != nullptr) {
-        if (grid.x > 0 && grid.y > 0) {
-            kernel::abstract_classical_spmv<subgroup_size>(
-                grid, block, 0, exec->get_queue(), a->get_size()[0],
-                alpha->get_const_values(), a->get_const_value(),
-                a->get_const_col_idxs(), a->get_const_row_ptrs(), b_vals,
-                beta->get_const_values(), c_vals);
-        }
+        kernel::abstract_classical_spmv<subgroup_size>(
+            grid, block, 0, exec->get_queue(), a->get_size()[0],
+            alpha->get_const_values(), a->get_const_value(),
+            a->get_const_col_idxs(), a->get_const_row_ptrs(), b_vals,
+            beta->get_const_values(), c_vals);
     } else {
         GKO_KERNEL_NOT_FOUND;
     }

diff --git a/hip/matrix/sparsity_csr_kernels.hip.cpp b/hip/matrix/sparsity_csr_kernels.hip.cpp
@@ -121,25 +121,20 @@ void classical_spmv(syn::value_list<int, subwarp_size>,
         return;
     }
     if (alpha == nullptr && beta == nullptr) {
-        if (grid.x > 0 && grid.y > 0) {
-            hipLaunchKernelGGL(
-                HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
-                grid, block, 0, 0, a->get_size()[0],
-                as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
-                as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
-                acc::as_hip_range(c_vals));
-        }
+        hipLaunchKernelGGL(
+            HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
+            grid, block, 0, 0, a->get_size()[0],
+            as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
+            as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
+            acc::as_hip_range(c_vals));
     } else if (alpha != nullptr && beta != nullptr) {
-        if (grid.x > 0 && grid.y > 0) {
-            hipLaunchKernelGGL(
-                HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
-                grid, block, 0, 0, a->get_size()[0],
-                as_hip_type(alpha->get_const_values()),
-                as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
-                as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
-                as_hip_type(beta->get_const_values()),
-                acc::as_hip_range(c_vals));
-        }
+        hipLaunchKernelGGL(
+            HIP_KERNEL_NAME(kernel::abstract_classical_spmv<subwarp_size>),
+            grid, block, 0, 0, a->get_size()[0],
+            as_hip_type(alpha->get_const_values()),
+            as_hip_type(a->get_const_value()), a->get_const_col_idxs(),
+            as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals),
+            as_hip_type(beta->get_const_values()), acc::as_hip_range(c_vals));
     } else {
         GKO_KERNEL_NOT_FOUND;
     }

diff --git a/omp/matrix/sparsity_csr_kernels.cpp b/omp/matrix/sparsity_csr_kernels.cpp
@@ -70,22 +70,22 @@ void spmv(std::shared_ptr<const OmpExecutor> exec,
           const matrix::Dense<InputValueType>* b,
           matrix::Dense<OutputValueType>* c)
 {
+    using arithmetic_type =
+        highest_precision<InputValueType, OutputValueType, MatrixValueType>;
     auto row_ptrs = a->get_const_row_ptrs();
     auto col_idxs = a->get_const_col_idxs();
-    const auto val = static_cast<OutputValueType>(a->get_const_value()[0]);
+    const auto val = static_cast<arithmetic_type>(a->get_const_value()[0]);
 
 #pragma omp parallel for
     for (size_type row = 0; row < a->get_size()[0]; ++row) {
         for (size_type j = 0; j < c->get_size()[1]; ++j) {
-            c->at(row, j) = zero<OutputValueType>();
-        }
-        for (size_type k = row_ptrs[row];
-             k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
-            auto col = col_idxs[k];
-            for (size_type j = 0; j < c->get_size()[1]; ++j) {
-                c->at(row, j) +=
-                    val * static_cast<OutputValueType>(b->at(col, j));
+            auto temp_val = gko::zero<arithmetic_type>();
+            for (size_type k = row_ptrs[row];
+                 k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
+                temp_val +=
+                    val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
             }
+            c->at(row, j) = static_cast<OutputValueType>(temp_val);
         }
     }
 }
@@ -103,24 +103,26 @@ void advanced_spmv(std::shared_ptr<const OmpExecutor> exec,
                    const matrix::Dense<OutputValueType>* beta,
                    matrix::Dense<OutputValueType>* c)
 {
+    using arithmetic_type =
+        highest_precision<InputValueType, OutputValueType, MatrixValueType>;
     auto row_ptrs = a->get_const_row_ptrs();
     auto col_idxs = a->get_const_col_idxs();
-    const auto valpha = static_cast<OutputValueType>(alpha->at(0, 0));
-    const auto vbeta = beta->at(0, 0);
-    const auto val = static_cast<OutputValueType>(a->get_const_value()[0]);
+    const auto valpha = static_cast<arithmetic_type>(alpha->at(0, 0));
+    const auto vbeta = static_cast<arithmetic_type>(beta->at(0, 0));
+    const auto val = static_cast<arithmetic_type>(a->get_const_value()[0]);
 
 #pragma omp parallel for
     for (size_type row = 0; row < a->get_size()[0]; ++row) {
         for (size_type j = 0; j < c->get_size()[1]; ++j) {
-            c->at(row, j) *= vbeta;
-        }
-        for (size_type k = row_ptrs[row];
-             k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
-            auto col = col_idxs[k];
-            for (size_type j = 0; j < c->get_size()[1]; ++j) {
-                c->at(row, j) +=
-                    valpha * val * static_cast<OutputValueType>(b->at(col, j));
+            auto temp_val = gko::zero<arithmetic_type>();
+            for (size_type k = row_ptrs[row];
+                 k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
+                temp_val +=
+                    val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
             }
+            c->at(row, j) = static_cast<OutputValueType>(
+                vbeta * static_cast<arithmetic_type>(c->at(row, j)) +
+                valpha * temp_val);
         }
     }
 }

diff --git a/reference/matrix/sparsity_csr_kernels.cpp b/reference/matrix/sparsity_csr_kernels.cpp
@@ -75,15 +75,13 @@ void spmv(std::shared_ptr<const ReferenceExecutor> exec,
 
     for (size_type row = 0; row < a->get_size()[0]; ++row) {
         for (size_type j = 0; j < c->get_size()[1]; ++j) {
-            c->at(row, j) = zero<OutputValueType>();
-        }
-        for (size_type k = row_ptrs[row];
-             k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
-            auto col = col_idxs[k];
-            for (size_type j = 0; j < c->get_size()[1]; ++j) {
-                c->at(row, j) += static_cast<OutputValueType>(
-                    val * static_cast<arithmetic_type>(b->at(col, j)));
+            auto temp_val = gko::zero<arithmetic_type>();
+            for (size_type k = row_ptrs[row];
+                 k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
+                temp_val +=
+                    val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
             }
+            c->at(row, j) = static_cast<OutputValueType>(temp_val);
         }
     }
 }
@@ -107,20 +105,20 @@ void advanced_spmv(std::shared_ptr<const ReferenceExecutor> exec,
     auto row_ptrs = a->get_const_row_ptrs();
     auto col_idxs = a->get_const_col_idxs();
     const auto valpha = static_cast<arithmetic_type>(alpha->at(0, 0));
-    const auto vbeta = static_cast<OutputValueType>(beta->at(0, 0));
+    const auto vbeta = static_cast<arithmetic_type>(beta->at(0, 0));
     const auto val = static_cast<arithmetic_type>(a->get_const_value()[0]);
 
     for (size_type row = 0; row < a->get_size()[0]; ++row) {
         for (size_type j = 0; j < c->get_size()[1]; ++j) {
-            c->at(row, j) *= vbeta;
-        }
-        for (size_type k = row_ptrs[row];
-             k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
-            auto col = col_idxs[k];
-            for (size_type j = 0; j < c->get_size()[1]; ++j) {
-                c->at(row, j) += static_cast<OutputValueType>(
-                    valpha * val * static_cast<arithmetic_type>(b->at(col, j)));
+            auto temp_val = gko::zero<arithmetic_type>();
+            for (size_type k = row_ptrs[row];
+                 k < static_cast<size_type>(row_ptrs[row + 1]); ++k) {
+                temp_val +=
+                    val * static_cast<arithmetic_type>(b->at(col_idxs[k], j));
             }
+            c->at(row, j) = static_cast<OutputValueType>(
+                vbeta * static_cast<arithmetic_type>(c->at(row, j)) +
+                valpha * temp_val);
         }
     }
 }

diff --git a/test/matrix/matrix.cpp b/test/matrix/matrix.cpp
@@ -361,7 +361,7 @@ struct SparsityCsr
         using entry_type =
             gko::matrix_data<matrix_value_type, int>::nonzero_type;
         for (auto& entry : data.nonzeros) {
-            entry = entry_type{entry.row, entry.column, matrix_value_type{1}};
+            entry.value = gko::one<matrix_value_type>();
         }
     }
 };