diff --git a/common/cuda_hip/matrix/dense_kernels.hpp.inc b/common/cuda_hip/matrix/dense_kernels.hpp.inc index f23b59237cc..2d029191721 100644 --- a/common/cuda_hip/matrix/dense_kernels.hpp.inc +++ b/common/cuda_hip/matrix/dense_kernels.hpp.inc @@ -423,16 +423,16 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( template __global__ __launch_bounds__(default_block_size) void row_scatter( size_type num_sets, IndexType* __restrict__ row_set_begins, - IndexType* __restrict__ row_set_offsets, size_type orig_num_rows, - size_type num_cols, size_type orig_stride, + IndexType* __restrict__ row_set_offsets, size_type target_num_rows, + size_type num_cols, size_type orig_num_rows, size_type orig_stride, const ValueType* __restrict__ orig_values, size_type target_stride, - OutputType* __restrict__ target_values) + OutputType* __restrict__ target_values, bool* __restrict__ invalid_access) { auto id = thread::get_thread_id_flat(); auto row = id / num_cols; auto col = id % num_cols; - if (row >= orig_num_rows) { + if (row >= orig_num_rows || *invalid_access) { return; } @@ -443,6 +443,11 @@ __global__ __launch_bounds__(default_block_size) void row_scatter( auto set_local_row = row - row_set_offsets[set_id]; auto target_row = set_local_row + row_set_begins[set_id]; + if (target_row >= target_num_rows) { + *invalid_access = true; + return; + } + target_values[target_row * target_stride + col] = orig_values[row * orig_stride + col]; } @@ -681,19 +686,28 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { - auto size = orig->get_size(); - if (size) { + auto orig_size = orig->get_size(); + auto target_size = target->get_size(); + + array invalid_access_arr(exec, {false}); + + if (orig_size) { constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); + auto num_blocks = ceildiv(orig_size[0] * orig_size[1], block_size); kernel::row_scatter<<get_stream()>>>( row_idxs->get_num_subsets(), as_device_type(row_idxs->get_subsets_begin()), - as_device_type(row_idxs->get_superset_indices()), size[0], size[1], - orig->get_stride(), as_device_type(orig->get_const_values()), - target->get_stride(), as_device_type(target->get_values())); + as_device_type(row_idxs->get_superset_indices()), target_size[0], + target_size[1], orig_size[0], orig->get_stride(), + as_device_type(orig->get_const_values()), target->get_stride(), + as_device_type(target->get_values()), + as_device_type(invalid_access_arr.get_data())); } + + invalid_access = + exec->copy_val_to_host(invalid_access_arr.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp index 3c0777e9638..498026bc677 100644 --- a/common/unified/matrix/dense_kernels.cpp +++ b/common/unified/matrix/dense_kernels.cpp @@ -498,16 +498,23 @@ template void row_scatter(std::shared_ptr exec, const array* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { + array invalid_access_arr{exec, {false}}; run_kernel( exec, - [] GKO_KERNEL(auto row, auto col, auto orig, auto rows, - auto scattered) { + [num_rows = target->get_size()[0]] GKO_KERNEL( + auto row, auto col, auto orig, auto rows, auto scattered, + auto* invalid_access_ptr) { + if (rows[row] >= num_rows) { + *invalid_access_ptr = true; + return; + } scattered(rows[row], col) = orig(row, col); }, dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]}, orig, *row_idxs, - target); + target, invalid_access_arr.get_data()); + invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); } GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 240f07f2a4e..f617cb66483 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1159,38 +1159,26 @@ void Dense::row_gather_impl(const Dense* alpha, } -template -template -void Dense::row_scatter_impl(const array* row_idxs, - Dense* target) const -{ - auto exec = this->get_executor(); - dim<2> expected_dim{row_idxs->get_num_elems(), this->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this); - GKO_ASSERT_EQUAL_COLS(this, target); - // @todo check that indices are inbounds for target - - exec->run(dense::make_row_scatter( - make_temporary_clone(exec, row_idxs).get(), this, - make_temporary_clone(exec, target).get())); -} - - -template -template -void Dense::row_scatter_impl(const index_set* row_idxs, - Dense* target) const +template +void row_scatter_impl(const IndexContainer* row_idxs, + const Dense* orig, Dense* target) { - auto exec = this->get_executor(); + auto exec = orig->get_executor(); dim<2> expected_dim{static_cast(row_idxs->get_num_elems()), - this->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this); - GKO_ASSERT_EQUAL_COLS(this, target); - // @todo check that indices are inbounds for target + orig->get_size()[1]}; + GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, orig); + GKO_ASSERT_EQUAL_COLS(orig, target); + + bool invalid_access; exec->run(dense::make_row_scatter( - make_temporary_clone(exec, row_idxs).get(), this, - make_temporary_clone(exec, target).get())); + make_temporary_clone(exec, row_idxs).get(), orig, + make_temporary_clone(exec, target).get(), invalid_access)); + + if (invalid_access) { + GKO_INVALID_STATE( + "Out-of-bounds access detected during kernel execution."); + } } @@ -1452,7 +1440,7 @@ void Dense::row_scatter(const array* row_idxs, ptr_param row_collection) const { gather_mixed_real_complex( - [&](auto dense) { this->row_scatter_impl(row_idxs, dense); }, + [&](auto dense) { row_scatter_impl(row_idxs, this, dense); }, row_collection.get()); } @@ -1463,7 +1451,7 @@ void Dense::row_scatter(const index_set* row_idxs, ptr_param row_collection) const { gather_mixed_real_complex( - [&](auto dense) { this->row_scatter_impl(row_idxs, dense); }, + [&](auto dense) { row_scatter_impl(row_idxs, this, dense); }, row_collection.get()); } diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 23252e57f82..09a8c223858 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -264,13 +264,13 @@ namespace kernels { void row_scatter(std::shared_ptr exec, \ const array<_itype>* gather_indices, \ const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* target) + matrix::Dense<_otype>* target, bool& invalid_access) #define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \ void row_scatter(std::shared_ptr exec, \ const index_set<_itype>* gather_indices, \ const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* target) + matrix::Dense<_otype>* target, bool& invalid_access) #define GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ void column_permute(std::shared_ptr exec, \ diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 2ed57fbbb80..1c8069c662c 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -199,6 +199,51 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, dcfg_sq_list); +template +void row_scatter_impl(std::shared_ptr exec, + const index_set* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target, bool* invalid_access) +{ + const auto num_sets = row_idxs->get_num_subsets(); + const auto num_rows = row_idxs->get_num_elems(); + const auto num_cols = orig->get_size()[1]; + + const auto* row_set_begins = row_idxs->get_subsets_begin(); + const auto* row_set_offsets = row_idxs->get_superset_indices(); + + const auto orig_stride = orig->get_stride(); + const auto* orig_values = orig->get_const_values(); + + const auto target_stride = target->get_stride(); + auto* target_values = target->get_values(); + + exec->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + static_cast(num_rows * num_cols), + [=](sycl::item<1> item) { + const auto row = static_cast(item[0]) / num_cols; + const auto col = static_cast(item[0]) % num_cols; + + if (row >= num_rows) { + return; + } + + auto set_id = + binary_search( + 0, num_sets + 1, + [=](auto i) { return row < row_set_offsets[i]; }) - + 1; + auto set_local_row = row - row_set_offsets[set_id]; + auto target_row = set_local_row + row_set_begins[set_id]; + + target_values[target_row * target_stride + col] = + orig_values[row * orig_stride + col]; + }); + }); +} + + } // namespace kernel @@ -607,44 +652,14 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { - const auto num_sets = row_idxs->get_num_subsets(); - const auto num_rows = row_idxs->get_num_elems(); - const auto num_cols = orig->get_size()[1]; + array invalid_access_arr; - const auto* row_set_begins = row_idxs->get_subsets_begin(); - const auto* row_set_offsets = row_idxs->get_superset_indices(); + kernel::row_scatter_impl(exec, row_idxs, orig, target, + invalid_access_arr.get_data()); - const auto orig_stride = orig->get_stride(); - const auto* orig_values = orig->get_const_values(); - - const auto target_stride = target->get_stride(); - auto* target_values = target->get_values(); - - exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - static_cast(num_rows * num_cols), - [=](sycl::item<1> item) { - const auto row = static_cast(item[0]) / num_cols; - const auto col = static_cast(item[0]) % num_cols; - - if (row >= num_rows) { - return; - } - - auto set_id = - binary_search( - 0, num_sets + 1, - [=](auto i) { return row < row_set_offsets[i]; }) - - 1; - auto set_local_row = row - row_set_offsets[set_id]; - auto target_row = set_local_row + row_set_begins[set_id]; - - target_values[target_row * target_stride + col] = - orig_values[row * orig_stride + col]; - }); - }); + invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); } GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 22d24312b61..96063f684a5 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -1316,14 +1316,6 @@ class Dense const Dense* beta, Dense* row_collection) const; - template - void row_scatter_impl(const array* row_idxs, - Dense* target) const; - - template - void row_scatter_impl(const index_set* row_idxs, - Dense* target) const; - template void column_permute_impl(const array* permutation, Dense* output) const; diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 1e9bbacc9ed..c137884949d 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -502,16 +502,26 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { auto set_begins = row_idxs->get_subsets_begin(); auto set_ends = row_idxs->get_subsets_end(); auto set_offsets = row_idxs->get_superset_indices(); -#pragma omp parallel for + invalid_access = false; +#pragma omp parallel for shared(invalid_access) for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { + if (invalid_access) { + continue; + } for (int target_row = set_begins[set]; target_row < set_ends[set]; ++target_row) { + if (invalid_access || target_row >= target->get_size()[0]) { + invalid_access = true; + break; + } + auto orig_row = target_row - set_begins[set] + set_offsets[set]; + for (size_type j = 0; j < orig->get_size()[1]; ++j) { target->at(target_row, j) = orig->at(orig_row, j); } diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 893df28b165..dada6016239 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -925,10 +925,14 @@ template void row_scatter(std::shared_ptr exec, const array* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { auto rows = row_idxs->get_const_data(); for (size_type i = 0; i < row_idxs->get_num_elems(); ++i) { + if (rows[i] >= target->get_size()[0]) { + invalid_access = true; + return; + } for (size_type j = 0; j < orig->get_size()[1]; ++j) { target->at(rows[i], j) = orig->at(i, j); } @@ -943,14 +947,19 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { auto set_begins = row_idxs->get_subsets_begin(); auto set_ends = row_idxs->get_subsets_end(); auto set_offsets = row_idxs->get_superset_indices(); + invalid_access = false; for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { for (int target_row = set_begins[set]; target_row < set_ends[set]; ++target_row) { + if (target_row >= target->get_size()[0]) { + invalid_access = true; + return; + } auto orig_row = target_row - set_begins[set] + set_offsets[set]; for (size_type j = 0; j < orig->get_size()[1]; ++j) { target->at(target_row, j) = orig->at(orig_row, j); diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index a4839bc1bb2..59dda5d8407 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -2414,6 +2414,20 @@ TYPED_TEST(Dense, MatrixScatterRowsFailsWithWrongDimensions) } +TYPED_TEST(Dense, MatrixScatterRowsFailsWithInvalidState) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + auto row_collection = + gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); + gko::array permute_idxs{exec, {200, 0}}; + + ASSERT_THROW(row_collection->row_scatter(&permute_idxs, this->mtx5), + gko::InvalidStateError); +} + + TYPED_TEST(Dense, MatrixCanScatterRowsUsingIndexSetIntoDense) { using Mtx = typename TestFixture::Mtx; @@ -2441,6 +2455,26 @@ TYPED_TEST(Dense, MatrixCanScatterRowsUsingIndexSetIntoDense) } +TYPED_TEST(Dense, MatrixScatterRowsUsingIndexSetFailsWithInvalidState) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + auto mtx = gko::initialize({{2.2, 6.9, 7.8}, + {4.7, 1.3, 7.6}, + {9.2, 8.6, 4.5}, + {8.1, 9.4, 6.8}, + {9.6, 7.1, 2.5}}, + exec); + auto row_collection = gko::initialize( + {{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}, {3.4, 3.8, 7.8}}, exec); + gko::index_set permute_idxs{exec, {1, 0, 44}}; + + ASSERT_THROW(row_collection->row_scatter(&permute_idxs, mtx), + gko::InvalidStateError); +} + + TYPED_TEST(Dense, MatrixGatherScatterIsIdentity) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 4462d1e2e34..d52732e3231 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1305,6 +1305,16 @@ TEST_F(Dense, CanScatterRowsIntoDense) } +TEST_F(Dense, CanScatterRowsIntoDenseFailsWithInvalidState) +{ + set_up_apply_data(); + gko::array out_of_bounds(ref, du->get_size()[0]); + out_of_bounds.get_data()[0] = dx->get_size()[0] * 40; + + ASSERT_THROW(du->row_scatter(&out_of_bounds, dx), gko::InvalidStateError); +} + + TEST_F(Dense, CanScatterRowsIntoDenseSubmatrix) { set_up_apply_data(); @@ -1344,6 +1354,22 @@ TEST_F(Dense, CanScatterRowsIntoDenseCrossExecutor) } +#ifdef NDEBUG +// this test can only be run if C asserts are disabled. Otherwise, +// an assert in the constructor of index_set may fail. +TEST_F(Dense, CanScatterRowsIntoDenseUsingIndexSetFailsWithInvalidState) +{ + set_up_apply_data(); + gko::array out_of_bounds(ref, du->get_size()[0]); + out_of_bounds.get_data()[0] = dx->get_size()[0] * 40; + auto rindices = std::make_unique>( + ref, x->get_size()[0], out_of_bounds); + + ASSERT_THROW(du->row_scatter(&out_of_bounds, dx), gko::InvalidStateError); +} +#endif + + TEST_F(Dense, GatherScatterIsIdentity) { set_up_apply_data();