Skip to content

Commit

Permalink
review comments
Browse files Browse the repository at this point in the history
Co-authored-by: Yu-hsiang Tsai <yu-hsiang.tsai@kit.edu>
  • Loading branch information
Slaedr and Yu-hsiang Tsai committed Oct 5, 2021
1 parent a5b3f65 commit 754be29
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 28 deletions.
7 changes: 4 additions & 3 deletions common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/


namespace kernel {


template <int mat_blk_sz, int subwarp_size, typename ValueType,
typename IndexType>
__global__ __launch_bounds__(default_block_size) void transpose_blocks(
Expand All @@ -50,10 +50,10 @@ __global__ __launch_bounds__(default_block_size) void transpose_blocks(
const unsigned sw_id_in_threadblock{threadIdx.x / subwarp_size};

constexpr int mat_blk_sz_2{mat_blk_sz * mat_blk_sz};
__shared__ UninitializedArray<ValueType, warps_in_block * mat_blk_sz_2>
origblocks;

for (auto ibz = begin_blk; ibz < nbnz; ibz += total_subwarp_count) {
__shared__ UninitializedArray<ValueType, warps_in_block * mat_blk_sz_2>
origblocks;
for (int i = sw_threadidx; i < mat_blk_sz_2; i += subwarp_size) {
origblocks[sw_id_in_threadblock * mat_blk_sz_2 + i] =
values[ibz * mat_blk_sz_2 + i];
Expand All @@ -70,4 +70,5 @@ __global__ __launch_bounds__(default_block_size) void transpose_blocks(
}
}


} // namespace kernel
27 changes: 14 additions & 13 deletions core/test/utils/fb_matrix_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,23 @@ namespace test {
template <typename MatrixType = matrix::Dense<>, typename NonzeroDistribution,
typename ValueDistribution, typename Engine, typename... MatrixArgs>
std::unique_ptr<MatrixType> generate_random_matrix_with_diag(
size_type num_rows, size_type num_cols, NonzeroDistribution&& nonzero_dist,
ValueDistribution&& value_dist, Engine&& engine,
std::shared_ptr<const Executor> exec, MatrixArgs&&... args)
typename MatrixType::index_type num_rows,
typename MatrixType::index_type num_cols,
NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist,
Engine&& engine, std::shared_ptr<const Executor> exec, MatrixArgs&&... args)
{
using value_type = typename MatrixType::value_type;
using index_type = typename MatrixType::index_type;

matrix_data<value_type, index_type> data{gko::dim<2>{num_rows, num_cols},
matrix_data<value_type, index_type> data{gko::dim<2>(num_rows, num_cols),
{}};

for (size_type row = 0; row < num_rows; ++row) {
std::vector<size_type> col_idx(num_cols);
for (index_type row = 0; row < num_rows; ++row) {
std::vector<index_type> col_idx(num_cols);
std::iota(col_idx.begin(), col_idx.end(), size_type(0));
// randomly generate number of nonzeros in this row
auto nnz_in_row = static_cast<size_type>(nonzero_dist(engine));
nnz_in_row = std::max(size_type(1), std::min(nnz_in_row, num_cols));
auto nnz_in_row = static_cast<index_type>(nonzero_dist(engine));
nnz_in_row = std::max(1, std::min(nnz_in_row, num_cols));
// select a subset of `nnz_in_row` column indexes, and fill these
// locations with random values
std::shuffle(col_idx.begin(), col_idx.end(), engine);
Expand All @@ -103,7 +104,7 @@ std::unique_ptr<MatrixType> generate_random_matrix_with_diag(
col_idx[nnz_in_row - 1] = row;
}
std::for_each(
begin(col_idx), begin(col_idx) + nnz_in_row, [&](size_type col) {
begin(col_idx), begin(col_idx) + nnz_in_row, [&](index_type col) {
data.nonzeros.emplace_back(
row, col,
detail::get_rand_value<value_type>(value_dist, engine));
Expand Down Expand Up @@ -211,7 +212,6 @@ std::unique_ptr<matrix::Fbcsr<ValueType, IndexType>> generate_fbcsr_from_csr(
* entry in each block-row.
*
* @param exec Reference executor.
* @param engine Random number engine to use, such as std::ranlux48.
* @param nbrows The number of block-rows in the generated matrix.
* @param nbcols The number of block-columns in the generated matrix.
* @param mat_blk_sz Block size of the generated matrix.
Expand All @@ -220,12 +220,13 @@ std::unique_ptr<matrix::Fbcsr<ValueType, IndexType>> generate_fbcsr_from_csr(
* @param unsort If true, the blocks of the generated matrix within each
* block-row are ordered randomly. Otherwise, blocks in each row
* are ordered by block-column index.
* @param engine Random number engine to use, such as std::ranlux48.
*/
template <typename ValueType, typename IndexType, typename RandEngine>
std::unique_ptr<matrix::Fbcsr<ValueType, IndexType>> generate_random_fbcsr(
std::shared_ptr<const ReferenceExecutor> ref, RandEngine engine,
const IndexType nbrows, const IndexType nbcols, const int mat_blk_sz,
const bool diag_dominant, const bool unsort)
std::shared_ptr<const ReferenceExecutor> ref, const IndexType nbrows,
const IndexType nbcols, const int mat_blk_sz, const bool diag_dominant,
const bool unsort, RandEngine&& engine)
{
using real_type = gko::remove_complex<ValueType>;
std::unique_ptr<matrix::Csr<ValueType, IndexType>> rand_csr_ref =
Expand Down
3 changes: 2 additions & 1 deletion core/test/utils/fb_matrix_generator_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class BlockMatrixGenerator : public ::testing::Test {
rbmtx_dd(gko::test::generate_fbcsr_from_csr(exec, mtx.get(), blk_sz,
true, std::ranlux48(42))),
cbmtx(gko::test::generate_random_fbcsr<value_type>(
exec, std::ranlux48(42), nbrows, nbcols, blk_sz, true, false))
exec, nbrows, nbcols, blk_sz, true, false, std::ranlux48(42)))
{}

const int nbrows = 100;
Expand Down Expand Up @@ -107,6 +107,7 @@ TEST_F(BlockMatrixGenerator, OutputHasCorrectSize)
ASSERT_EQ(cbmtx->get_block_size(), blk_sz);
}


TEST_F(BlockMatrixGenerator, OutputHasCorrectSparsityPattern)
{
ASSERT_EQ(mtx->get_num_stored_elements(),
Expand Down
5 changes: 4 additions & 1 deletion cuda/matrix/fbcsr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

namespace {


template <int mat_blk_sz, typename ValueType, typename IndexType>
void transpose_blocks_impl(syn::value_list<int, mat_blk_sz>,
matrix::Fbcsr<ValueType, IndexType>* const mat)
{
constexpr int subwarp_size = config::warp_size;
const size_type nbnz = mat->get_num_stored_blocks();
const size_type numthreads = nbnz * subwarp_size;
const size_type numblocks = (numthreads - 1) / default_block_size + 1;
// const size_type numblocks = (numthreads - 1) / default_block_size + 1;
const size_type numblocks = ceildiv(numthreads, default_block_size);
const dim3 block_size{static_cast<unsigned>(default_block_size), 1, 1};
const dim3 grid_dim{static_cast<unsigned>(numblocks), 1, 1};
kernel::transpose_blocks<mat_blk_sz, subwarp_size>
Expand All @@ -218,6 +220,7 @@ void transpose_blocks_impl(syn::value_list<int, mat_blk_sz>,
GKO_ENABLE_IMPLEMENTATION_SELECTION(select_transpose_blocks,
transpose_blocks_impl);


} // namespace


Expand Down
11 changes: 6 additions & 5 deletions cuda/test/matrix/fbcsr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class Fbcsr : public ::testing::Test {
const index_type rand_bcols = 70;
const int block_size = 3;
rsorted_ref = gko::test::generate_random_fbcsr<value_type, index_type>(
ref, std::ranlux48(43), rand_brows, rand_bcols, block_size, false,
false);
ref, rand_brows, rand_bcols, block_size, false, false,
std::ranlux48(43));
}

void TearDown()
Expand Down Expand Up @@ -148,13 +148,13 @@ TYPED_TEST(Fbcsr, TransposeIsEquivalentToRefSortedBS7)
const int block_size = 7;
auto rsorted_ref2 =
gko::test::generate_random_fbcsr<value_type, index_type>(
this->ref, std::ranlux48(43), rand_brows, rand_bcols, block_size,
false, false);
this->ref, rand_brows, rand_bcols, block_size, false, false,
std::ranlux48(43));
rand_cuda->copy_from(gko::lend(rsorted_ref2));

auto trans_ref_linop = rsorted_ref2->transpose();
std::unique_ptr<const Mtx> trans_ref =
gko::as<const Mtx>(std::move(trans_ref_linop));

auto trans_cuda_linop = rand_cuda->transpose();
std::unique_ptr<const Mtx> trans_cuda =
gko::as<const Mtx>(std::move(trans_cuda_linop));
Expand Down Expand Up @@ -310,4 +310,5 @@ TYPED_TEST(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef)
GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 5 * tol);
}


} // namespace
10 changes: 5 additions & 5 deletions omp/test/matrix/fbcsr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,11 @@ class Fbcsr : public ::testing::Test {
void set_up_apply_data(int num_vectors = 1)
{
mtx = gko::test::generate_random_fbcsr<real_type>(
ref, rand_engine, num_brows, num_bcols, blk_sz, false, false);
ref, num_brows, num_bcols, blk_sz, false, false, rand_engine);
complex_mtx = gko::test::generate_random_fbcsr<std::complex<real_type>>(
ref, rand_engine, num_brows, num_bcols, blk_sz, false, false);
ref, num_brows, num_bcols, blk_sz, false, false, rand_engine);
square_mtx = gko::test::generate_random_fbcsr<real_type>(
ref, rand_engine, num_brows, num_brows, blk_sz, false, false);
ref, num_brows, num_brows, blk_sz, false, false, rand_engine);
dmtx = Mtx::create(omp);
dmtx->copy_from(mtx.get());
complex_dmtx = ComplexMtx::create(omp);
Expand Down Expand Up @@ -131,7 +131,7 @@ class Fbcsr : public ::testing::Test {
{
constexpr int min_nnz_per_row{2};
auto local_mtx_ref = gko::test::generate_random_fbcsr<real_type>(
ref, rand_engine, num_brows, num_bcols, blk_sz, false, true);
ref, num_brows, num_bcols, blk_sz, false, true, rand_engine);

auto local_mtx_omp = Mtx::create(omp);
local_mtx_omp->copy_from(local_mtx_ref.get());
Expand Down Expand Up @@ -404,7 +404,7 @@ TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef)
TEST_F(Fbcsr, MaxNnzPerRowIsEquivalentToRefSortedBS3)
{
auto mtx_ref = gko::test::generate_random_fbcsr<real_type>(
ref, rand_engine, num_brows, num_bcols, blk_sz, false, false);
ref, num_brows, num_bcols, blk_sz, false, false, rand_engine);
auto rand_omp = Mtx::create(omp);
rand_omp->copy_from(gko::lend(mtx_ref));
gko::size_type ref_max_nnz{}, omp_max_nnz{};
Expand Down
3 changes: 3 additions & 0 deletions reference/matrix/fbcsr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

namespace {


template <int mat_blk_sz, typename ValueType, typename IndexType>
void sort_by_column_index_impl(
syn::value_list<int, mat_blk_sz>,
Expand Down Expand Up @@ -447,8 +448,10 @@ void sort_by_column_index_impl(
GKO_ENABLE_IMPLEMENTATION_SELECTION(select_sort_col_idx,
sort_by_column_index_impl);


} // namespace


template <typename ValueType, typename IndexType>
void sort_by_column_index(const std::shared_ptr<const ReferenceExecutor> exec,
matrix::Fbcsr<ValueType, IndexType>* const to_sort)
Expand Down

0 comments on commit 754be29

Please sign in to comment.