Skip to content

Commit

Permalink
adds in-bounds check to kernels
Browse files Browse the repository at this point in the history
Co-authored-by: Yu-Hsiang M. Tsai <yhmtsai@gmail.com>
  • Loading branch information
MarcelKoch and yhmtsai committed Aug 7, 2023
1 parent 4908ff5 commit bb553d5
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 94 deletions.
36 changes: 25 additions & 11 deletions common/cuda_hip/matrix/dense_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -423,16 +423,16 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp(
template <typename ValueType, typename OutputType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void row_scatter(
size_type num_sets, IndexType* __restrict__ row_set_begins,
IndexType* __restrict__ row_set_offsets, size_type orig_num_rows,
size_type num_cols, size_type orig_stride,
IndexType* __restrict__ row_set_offsets, size_type target_num_rows,
size_type num_cols, size_type orig_num_rows, size_type orig_stride,
const ValueType* __restrict__ orig_values, size_type target_stride,
OutputType* __restrict__ target_values)
OutputType* __restrict__ target_values, bool* __restrict__ invalid_access)
{
auto id = thread::get_thread_id_flat();
auto row = id / num_cols;
auto col = id % num_cols;

if (row >= orig_num_rows) {
if (row >= orig_num_rows || *invalid_access) {
return;
}

Expand All @@ -443,6 +443,11 @@ __global__ __launch_bounds__(default_block_size) void row_scatter(
auto set_local_row = row - row_set_offsets[set_id];
auto target_row = set_local_row + row_set_begins[set_id];

if (target_row >= target_num_rows) {
*invalid_access = true;
return;
}

target_values[target_row * target_stride + col] =
orig_values[row * orig_stride + col];
}
Expand Down Expand Up @@ -681,19 +686,28 @@ template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
const index_set<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target)
matrix::Dense<OutputType>* target, bool& invalid_access)
{
auto size = orig->get_size();
if (size) {
auto orig_size = orig->get_size();
auto target_size = target->get_size();

array<bool> invalid_access_arr(exec, {false});

if (orig_size) {
constexpr auto block_size = default_block_size;
auto num_blocks = ceildiv(size[0] * size[1], block_size);
auto num_blocks = ceildiv(orig_size[0] * orig_size[1], block_size);
kernel::row_scatter<<<num_blocks, block_size, 0, exec->get_stream()>>>(
row_idxs->get_num_subsets(),
as_device_type(row_idxs->get_subsets_begin()),
as_device_type(row_idxs->get_superset_indices()), size[0], size[1],
orig->get_stride(), as_device_type(orig->get_const_values()),
target->get_stride(), as_device_type(target->get_values()));
as_device_type(row_idxs->get_superset_indices()), target_size[0],
target_size[1], orig_size[0], orig->get_stride(),
as_device_type(orig->get_const_values()), target->get_stride(),
as_device_type(target->get_values()),
as_device_type(invalid_access_arr.get_data()));
}

invalid_access =
exec->copy_val_to_host(invalid_access_arr.get_const_data());
}

GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
Expand Down
15 changes: 11 additions & 4 deletions common/unified/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,16 +498,23 @@ template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
const array<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target)
matrix::Dense<OutputType>* target, bool& invalid_access)
{
array<bool> invalid_access_arr{exec, {false}};
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto orig, auto rows,
auto scattered) {
[num_rows = target->get_size()[0]] GKO_KERNEL(
auto row, auto col, auto orig, auto rows, auto scattered,
auto* invalid_access_ptr) {
if (rows[row] >= num_rows) {
*invalid_access_ptr = true;
return;
}
scattered(rows[row], col) = orig(row, col);
},
dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]}, orig, *row_idxs,
target);
target, invalid_access_arr.get_data());
invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data());
}

GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
Expand Down
48 changes: 18 additions & 30 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,38 +1159,26 @@ void Dense<ValueType>::row_gather_impl(const Dense<ValueType>* alpha,
}


template <typename ValueType>
template <typename OutputType, typename IndexType>
void Dense<ValueType>::row_scatter_impl(const array<IndexType>* row_idxs,
Dense<OutputType>* target) const
{
auto exec = this->get_executor();
dim<2> expected_dim{row_idxs->get_num_elems(), this->get_size()[1]};
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this);
GKO_ASSERT_EQUAL_COLS(this, target);
// @todo check that indices are inbounds for target

exec->run(dense::make_row_scatter(
make_temporary_clone(exec, row_idxs).get(), this,
make_temporary_clone(exec, target).get()));
}


template <typename ValueType>
template <typename OutputType, typename IndexType>
void Dense<ValueType>::row_scatter_impl(const index_set<IndexType>* row_idxs,
Dense<OutputType>* target) const
template <typename ValueType, typename OutputType, typename IndexContainer>
void row_scatter_impl(const IndexContainer* row_idxs,
const Dense<ValueType>* orig, Dense<OutputType>* target)
{
auto exec = this->get_executor();
auto exec = orig->get_executor();
dim<2> expected_dim{static_cast<size_type>(row_idxs->get_num_elems()),
this->get_size()[1]};
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this);
GKO_ASSERT_EQUAL_COLS(this, target);
// @todo check that indices are inbounds for target
orig->get_size()[1]};
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, orig);
GKO_ASSERT_EQUAL_COLS(orig, target);

bool invalid_access;

exec->run(dense::make_row_scatter(
make_temporary_clone(exec, row_idxs).get(), this,
make_temporary_clone(exec, target).get()));
make_temporary_clone(exec, row_idxs).get(), orig,
make_temporary_clone(exec, target).get(), invalid_access));

if (invalid_access) {
GKO_INVALID_STATE(
"Out-of-bounds access detected during kernel execution.");
}
}


Expand Down Expand Up @@ -1452,7 +1440,7 @@ void Dense<ValueType>::row_scatter(const array<IndexType>* row_idxs,
ptr_param<LinOp> row_collection) const
{
gather_mixed_real_complex<ValueType>(
[&](auto dense) { this->row_scatter_impl(row_idxs, dense); },
[&](auto dense) { row_scatter_impl(row_idxs, this, dense); },
row_collection.get());
}

Expand All @@ -1463,7 +1451,7 @@ void Dense<ValueType>::row_scatter(const index_set<IndexType>* row_idxs,
ptr_param<LinOp> row_collection) const
{
gather_mixed_real_complex<ValueType>(
[&](auto dense) { this->row_scatter_impl(row_idxs, dense); },
[&](auto dense) { row_scatter_impl(row_idxs, this, dense); },
row_collection.get());
}

Expand Down
4 changes: 2 additions & 2 deletions core/matrix/dense_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,13 +264,13 @@ namespace kernels {
void row_scatter(std::shared_ptr<const DefaultExecutor> exec, \
const array<_itype>* gather_indices, \
const matrix::Dense<_vtype>* orig, \
matrix::Dense<_otype>* target)
matrix::Dense<_otype>* target, bool& invalid_access)

#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \
void row_scatter(std::shared_ptr<const DefaultExecutor> exec, \
const index_set<_itype>* gather_indices, \
const matrix::Dense<_vtype>* orig, \
matrix::Dense<_otype>* target)
matrix::Dense<_otype>* target, bool& invalid_access)

#define GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \
void column_permute(std::shared_ptr<const DefaultExecutor> exec, \
Expand Down
85 changes: 50 additions & 35 deletions dpcpp/matrix/dense_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,51 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose,
dcfg_sq_list);


template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter_impl(std::shared_ptr<const DefaultExecutor> exec,
const index_set<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target, bool* invalid_access)
{
const auto num_sets = row_idxs->get_num_subsets();
const auto num_rows = row_idxs->get_num_elems();
const auto num_cols = orig->get_size()[1];

const auto* row_set_begins = row_idxs->get_subsets_begin();
const auto* row_set_offsets = row_idxs->get_superset_indices();

const auto orig_stride = orig->get_stride();
const auto* orig_values = orig->get_const_values();

const auto target_stride = target->get_stride();
auto* target_values = target->get_values();

exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
static_cast<size_type>(num_rows * num_cols),
[=](sycl::item<1> item) {
const auto row = static_cast<size_type>(item[0]) / num_cols;
const auto col = static_cast<size_type>(item[0]) % num_cols;

if (row >= num_rows) {
return;
}

auto set_id =
binary_search<size_type>(
0, num_sets + 1,
[=](auto i) { return row < row_set_offsets[i]; }) -
1;
auto set_local_row = row - row_set_offsets[set_id];
auto target_row = set_local_row + row_set_begins[set_id];

target_values[target_row * target_stride + col] =
orig_values[row * orig_stride + col];
});
});
}


} // namespace kernel


Expand Down Expand Up @@ -607,44 +652,14 @@ template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
const index_set<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target)
matrix::Dense<OutputType>* target, bool& invalid_access)
{
const auto num_sets = row_idxs->get_num_subsets();
const auto num_rows = row_idxs->get_num_elems();
const auto num_cols = orig->get_size()[1];
array<bool> invalid_access_arr;

const auto* row_set_begins = row_idxs->get_subsets_begin();
const auto* row_set_offsets = row_idxs->get_superset_indices();
kernel::row_scatter_impl(exec, row_idxs, orig, target,
invalid_access_arr.get_data());

const auto orig_stride = orig->get_stride();
const auto* orig_values = orig->get_const_values();

const auto target_stride = target->get_stride();
auto* target_values = target->get_values();

exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
static_cast<size_type>(num_rows * num_cols),
[=](sycl::item<1> item) {
const auto row = static_cast<size_type>(item[0]) / num_cols;
const auto col = static_cast<size_type>(item[0]) % num_cols;

if (row >= num_rows) {
return;
}

auto set_id =
binary_search<size_type>(
0, num_sets + 1,
[=](auto i) { return row < row_set_offsets[i]; }) -
1;
auto set_local_row = row - row_set_offsets[set_id];
auto target_row = set_local_row + row_set_begins[set_id];

target_values[target_row * target_stride + col] =
orig_values[row * orig_stride + col];
});
});
invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data());
}

GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
Expand Down
8 changes: 0 additions & 8 deletions include/ginkgo/core/matrix/dense.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1316,14 +1316,6 @@ class Dense
const Dense<ValueType>* beta,
Dense<OutputType>* row_collection) const;

template <typename OutputType, typename IndexType>
void row_scatter_impl(const array<IndexType>* row_idxs,
Dense<OutputType>* target) const;

template <typename OutputType, typename IndexType>
void row_scatter_impl(const index_set<IndexType>* row_idxs,
Dense<OutputType>* target) const;

template <typename IndexType>
void column_permute_impl(const array<IndexType>* permutation,
Dense* output) const;
Expand Down
14 changes: 12 additions & 2 deletions omp/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,16 +502,26 @@ template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
const index_set<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target)
matrix::Dense<OutputType>* target, bool& invalid_access)
{
auto set_begins = row_idxs->get_subsets_begin();
auto set_ends = row_idxs->get_subsets_end();
auto set_offsets = row_idxs->get_superset_indices();
#pragma omp parallel for
invalid_access = false;
#pragma omp parallel for shared(invalid_access)
for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) {
if (invalid_access) {
continue;
}
for (int target_row = set_begins[set]; target_row < set_ends[set];
++target_row) {
if (invalid_access || target_row >= target->get_size()[0]) {
invalid_access = true;
break;
}

auto orig_row = target_row - set_begins[set] + set_offsets[set];

for (size_type j = 0; j < orig->get_size()[1]; ++j) {
target->at(target_row, j) = orig->at(orig_row, j);
}
Expand Down
13 changes: 11 additions & 2 deletions reference/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,10 +925,14 @@ template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter(std::shared_ptr<const ReferenceExecutor> exec,
const array<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target)
matrix::Dense<OutputType>* target, bool& invalid_access)
{
auto rows = row_idxs->get_const_data();
for (size_type i = 0; i < row_idxs->get_num_elems(); ++i) {
if (rows[i] >= target->get_size()[0]) {
invalid_access = true;
return;
}
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
target->at(rows[i], j) = orig->at(i, j);
}
Expand All @@ -943,14 +947,19 @@ template <typename ValueType, typename OutputType, typename IndexType>
void row_scatter(std::shared_ptr<const ReferenceExecutor> exec,
const index_set<IndexType>* row_idxs,
const matrix::Dense<ValueType>* orig,
matrix::Dense<OutputType>* target)
matrix::Dense<OutputType>* target, bool& invalid_access)
{
auto set_begins = row_idxs->get_subsets_begin();
auto set_ends = row_idxs->get_subsets_end();
auto set_offsets = row_idxs->get_superset_indices();
invalid_access = false;
for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) {
for (int target_row = set_begins[set]; target_row < set_ends[set];
++target_row) {
if (target_row >= target->get_size()[0]) {
invalid_access = true;
return;
}
auto orig_row = target_row - set_begins[set] + set_offsets[set];
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
target->at(target_row, j) = orig->at(orig_row, j);
Expand Down
Loading

0 comments on commit bb553d5

Please sign in to comment.