Skip to content

Commit

Permalink
Merge extended sparsity pattern for ISAI
Browse files Browse the repository at this point in the history
* Adds the possibility to use higher powers of the triangular matrix as the input sparsity pattern for 
  the incomplete sparse approximate inverse (ISAI) generation.

* Moves all SpGEMM implementations to sorted output and adds is_sorted_by_column_idx 
  GPU kernels to test them.

* Fixes issues with gko::as<...> and cross-casting for objects with multiple base classes.

Related PR: #508
  • Loading branch information
upsj committed Apr 25, 2020
2 parents 120963c + 7d200b8 commit 90a0e5c
Show file tree
Hide file tree
Showing 25 changed files with 560 additions and 57 deletions.
30 changes: 30 additions & 0 deletions common/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,36 @@ __global__ __launch_bounds__(default_block_size) void fill_in_hybrid(
}


template <typename IndexType>
__global__ __launch_bounds__(default_block_size) void check_unsorted(
const IndexType *__restrict__ row_ptrs,
const IndexType *__restrict__ col_idxs, IndexType num_rows, bool *flag)
{
__shared__ bool sh_flag;
auto block = group::this_thread_block();
if (block.thread_rank() == 0) {
sh_flag = *flag;
}
block.sync();

auto row = thread::get_thread_id_flat<IndexType>();
if (row >= num_rows) {
return;
}

// fail early
if (sh_flag) {
for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1] - 1; ++nz) {
if (col_idxs[nz] > col_idxs[nz + 1]) {
*flag = false;
sh_flag = false;
return;
}
}
}
}


} // namespace kernel


Expand Down
16 changes: 16 additions & 0 deletions common/preconditioner/isai_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -210,4 +210,20 @@ __launch_bounds__(subwarp_size *subwarps_per_block) void generate_u_inverse(
}


template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void identity_triangle(
IndexType num_rows, const IndexType *__restrict__ row_ptrs,
ValueType *__restrict__ vals, bool lower)
{
auto row = thread::get_thread_id_flat<IndexType>();
if (row >= num_rows) {
return;
}
auto diagonal_nz = lower ? row_ptrs[row + 1] - 1 : row_ptrs[row];
for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1]; ++nz) {
vals[nz] = nz == diagonal_nz ? one<ValueType>() : zero<ValueType>();
}
}


} // namespace kernel
6 changes: 6 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,12 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_ISAI_GENERATE_U_INVERSE_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_ISAI_IDENTITY_TRIANGLE_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_ISAI_IDENTITY_TRIANGLE_KERNEL);


} // namespace isai

Expand Down
69 changes: 55 additions & 14 deletions core/preconditioner/isai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ namespace preconditioner {
namespace isai {


GKO_REGISTER_OPERATION(identity_triangle, isai::identity_triangle);
GKO_REGISTER_OPERATION(generate_l_inverse, isai::generate_l_inverse);
GKO_REGISTER_OPERATION(generate_u_inverse, isai::generate_u_inverse);

Expand All @@ -74,20 +75,20 @@ GKO_REGISTER_OPERATION(generate_u_inverse, isai::generate_u_inverse);
* otherwise, it will not be sorted.
*/
template <typename Csr>
std::unique_ptr<const Csr, std::function<void(const Csr *)>>
convert_to_csr_and_sort(std::shared_ptr<const Executor> &exec, const LinOp *mtx,
bool skip_sorting)
std::shared_ptr<const Csr> convert_to_csr_and_sort(
std::shared_ptr<const Executor> &exec, std::shared_ptr<const LinOp> mtx,
bool skip_sorting)
{
static_assert(
std::is_same<Csr, matrix::Csr<typename Csr::value_type,
typename Csr::index_type>>::value,
"The given `Csr` type must be of type `matrix::Csr`!");
if (skip_sorting && exec == mtx->get_executor()) {
auto csr_mtx = dynamic_cast<const Csr *>(mtx);
auto csr_mtx = std::dynamic_pointer_cast<const Csr>(mtx);
if (csr_mtx) {
// Here, we can just forward the pointer with an empty deleter
// since it is already sorted and in the correct format
return {csr_mtx, null_deleter<const Csr>{}};
return csr_mtx;
}
}
auto copy = Csr::create(exec);
Expand All @@ -97,39 +98,79 @@ convert_to_csr_and_sort(std::shared_ptr<const Executor> &exec, const LinOp *mtx,
if (!skip_sorting) {
copy->sort_by_column_index();
}
return {copy.release(), std::default_delete<const Csr>{}};
return {std::move(copy)};
}


/**
* @internal
*
* Helper function that extends the sparsity pattern of the matrix M to M^n
* without changing its values.
*
* The input matrix must be sorted and on the correct executor for this to work.
* If `power` is 1, the matrix will be returned unchanged.
*/
template <typename Csr>
std::shared_ptr<const Csr> extend_sparsity(
std::shared_ptr<const Executor> &exec, std::shared_ptr<const Csr> mtx,
int power, bool lower)
{
GKO_ASSERT_EQ(power >= 1, true);
if (power == 1) {
return mtx;
}
auto id = mtx->clone();
exec->run(isai::make_identity_triangle(id.get(), lower));
auto id_power = id->clone();
auto tmp = Csr::create(exec, mtx->get_size());
// compute id^(n-1) and then multiply it with mtx
// TODO replace this by a square-and-multiply algorithm
for (int i = 1; i < power - 1; ++i) {
id->apply(id_power.get(), tmp.get());
std::swap(id_power, tmp);
}
// finally compute id^(n-1) * mtx
id_power->apply(mtx.get(), tmp.get());
return {std::move(tmp)};
}


template <isai_type IsaiType, typename ValueType, typename IndexType>
void Isai<IsaiType, ValueType, IndexType>::generate_l_inverse(
const LinOp *to_invert_l, bool skip_sorting)
std::shared_ptr<const LinOp> to_invert_l, bool skip_sorting, int power)
{
GKO_ASSERT_IS_SQUARE_MATRIX(to_invert_l);
auto exec = this->get_executor();
auto csr_l = convert_to_csr_and_sort<Csr>(exec, to_invert_l, skip_sorting);
const auto num_elems = csr_l->get_num_stored_elements();
auto strategy = csr_l->get_strategy();
auto csr_extended_l = extend_sparsity(exec, csr_l, power, true);
const auto num_elems = csr_extended_l->get_num_stored_elements();

std::shared_ptr<Csr> inverted_l =
Csr::create(exec, csr_l->get_size(), num_elems, csr_l->get_strategy());
exec->run(isai::make_generate_l_inverse(csr_l.get(), inverted_l.get()));
Csr::create(exec, csr_extended_l->get_size(), num_elems, strategy);
exec->run(
isai::make_generate_l_inverse(csr_extended_l.get(), inverted_l.get()));

approximate_inverse_ = std::move(inverted_l);
}


template <isai_type IsaiType, typename ValueType, typename IndexType>
void Isai<IsaiType, ValueType, IndexType>::generate_u_inverse(
const LinOp *to_invert_u, bool skip_sorting)
std::shared_ptr<const LinOp> to_invert_u, bool skip_sorting, int power)
{
GKO_ASSERT_IS_SQUARE_MATRIX(to_invert_u);
auto exec = this->get_executor();
auto csr_u = convert_to_csr_and_sort<Csr>(exec, to_invert_u, skip_sorting);
const auto num_elems = csr_u->get_num_stored_elements();
auto strategy = csr_u->get_strategy();
auto csr_extended_u = extend_sparsity(exec, csr_u, power, false);
const auto num_elems = csr_extended_u->get_num_stored_elements();

std::shared_ptr<Csr> inverted_u =
Csr::create(exec, csr_u->get_size(), num_elems, csr_u->get_strategy());
exec->run(isai::make_generate_u_inverse(csr_u.get(), inverted_u.get()));
Csr::create(exec, csr_extended_u->get_size(), num_elems, strategy);
exec->run(
isai::make_generate_u_inverse(csr_extended_u.get(), inverted_u.get()));

approximate_inverse_ = std::move(inverted_u);
}
Expand Down
8 changes: 7 additions & 1 deletion core/preconditioner/isai_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,17 @@ namespace kernels {
const matrix::Csr<ValueType, IndexType> *u_csr, \
matrix::Csr<ValueType, IndexType> *inverse_u)

#define GKO_DECLARE_ISAI_IDENTITY_TRIANGLE_KERNEL(ValueType, IndexType) \
void identity_triangle(std::shared_ptr<const DefaultExecutor> exec, \
matrix::Csr<ValueType, IndexType> *mtx, bool lower)

#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_ISAI_GENERATE_L_INVERSE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_ISAI_GENERATE_U_INVERSE_KERNEL(ValueType, IndexType)
GKO_DECLARE_ISAI_GENERATE_U_INVERSE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_ISAI_IDENTITY_TRIANGLE_KERNEL(ValueType, IndexType)


namespace omp {
Expand Down
37 changes: 37 additions & 0 deletions core/test/base/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ struct Derived : Base {};
struct NonRelated : Base {};


struct Base2 {
virtual ~Base2() = default;
};


struct MultipleDerived : Base, Base2 {};


struct ClonableDerived : Base {
ClonableDerived(std::shared_ptr<const gko::Executor> exec = nullptr)
: executor(exec)
Expand Down Expand Up @@ -338,6 +346,35 @@ TEST(As, FailsToConvertConstSharedPtrIfNotRelated)
}


TEST(As, CanCrossCastUniquePtr)
{
auto obj = std::unique_ptr<MultipleDerived>(new MultipleDerived{});
auto ptr = obj.get();
auto base = gko::as<Base>(std::move(obj));

ASSERT_EQ(gko::as<MultipleDerived>(gko::as<Base2>(std::move(base))).get(),
ptr);
}


TEST(As, CanCrossCastSharedPtr)
{
auto obj = std::make_shared<MultipleDerived>();
auto base = gko::as<Base>(obj);

ASSERT_EQ(gko::as<MultipleDerived>(gko::as<Base2>(base)), base);
}


TEST(As, CanCrossCastConstSharedPtr)
{
auto obj = std::make_shared<const MultipleDerived>();
auto base = gko::as<const Base>(obj);

ASSERT_EQ(gko::as<const MultipleDerived>(gko::as<const Base2>(base)), base);
}


struct DummyObject : gko::EnablePolymorphicObject<DummyObject>,
gko::EnablePolymorphicAssignment<DummyObject>,
gko::EnableCreateMethod<DummyObject> {
Expand Down
26 changes: 24 additions & 2 deletions core/test/preconditioner/isai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ TYPED_TEST(IsaiFactory, KnowsItsExecutor)
}


TYPED_TEST(IsaiFactory, SetsDefaultSkipSortingCorrectly)
TYPED_TEST(IsaiFactory, SetsSkipSortingCorrectly)
{
using LowerIsai = typename TestFixture::LowerIsai;
using UpperIsai = typename TestFixture::UpperIsai;
Expand All @@ -111,13 +111,35 @@ TYPED_TEST(IsaiFactory, SetsDefaultSkipSortingCorrectly)
}


TYPED_TEST(IsaiFactory, SetskipSortingCorrectly)
TYPED_TEST(IsaiFactory, SetsDefaultSkipSortingCorrectly)
{
ASSERT_EQ(this->lower_isai_factory->get_parameters().skip_sorting, false);
ASSERT_EQ(this->upper_isai_factory->get_parameters().skip_sorting, false);
}


TYPED_TEST(IsaiFactory, SetsSparsityPowerCorrectly)
{
using LowerIsai = typename TestFixture::LowerIsai;
using UpperIsai = typename TestFixture::UpperIsai;

auto l_isai_factory =
LowerIsai::build().with_sparsity_power(2).on(this->exec);
auto u_isai_factory =
UpperIsai::build().with_sparsity_power(2).on(this->exec);

ASSERT_EQ(l_isai_factory->get_parameters().sparsity_power, 2);
ASSERT_EQ(u_isai_factory->get_parameters().sparsity_power, 2);
}


TYPED_TEST(IsaiFactory, SetsDefaultSparsityPowerCorrectly)
{
ASSERT_EQ(this->lower_isai_factory->get_parameters().sparsity_power, 1);
ASSERT_EQ(this->upper_isai_factory->get_parameters().sparsity_power, 1);
}


TYPED_TEST(IsaiFactory, ThrowsWrongDimensionL)
{
using Csr = typename TestFixture::Csr;
Expand Down
15 changes: 13 additions & 2 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1018,8 +1018,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
template <typename ValueType, typename IndexType>
void is_sorted_by_column_index(
std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *to_check,
bool *is_sorted) GKO_NOT_IMPLEMENTED;
const matrix::Csr<ValueType, IndexType> *to_check, bool *is_sorted)
{
*is_sorted = true;
auto cpu_array = Array<bool>::view(exec->get_master(), 1, is_sorted);
auto gpu_array = Array<bool>{exec, cpu_array};
auto block_size = default_block_size;
auto num_rows = static_cast<IndexType>(to_check->get_size()[0]);
auto num_blocks = ceildiv(num_rows, block_size);
kernel::check_unsorted<<<num_blocks, block_size>>>(
to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
num_rows, gpu_array.get_data());
cpu_array = gpu_array;
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
Expand Down
25 changes: 18 additions & 7 deletions cuda/preconditioner/isai_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,16 @@ namespace cuda {
* @ingroup isai
*/
namespace isai {
namespace {


#include "common/preconditioner/isai_kernels.hpp.inc"


} // namespace


constexpr int subwarp_size{config::warp_size};
constexpr int subwarps_per_block{2};
constexpr int default_block_size{subwarps_per_block * subwarp_size};


#include "common/preconditioner/isai_kernels.hpp.inc"


template <typename ValueType, typename IndexType>
void generate_l_inverse(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType> *l_csr,
Expand Down Expand Up @@ -130,6 +126,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_ISAI_GENERATE_U_INVERSE_KERNEL);


template <typename ValueType, typename IndexType>
void identity_triangle(std::shared_ptr<const DefaultExecutor> exec,
matrix::Csr<ValueType, IndexType> *mtx, bool lower)
{
auto num_rows = mtx->get_size()[0];
auto num_blocks = ceildiv(num_rows, default_block_size);
kernel::identity_triangle<<<num_blocks, default_block_size>>>(
static_cast<IndexType>(num_rows), mtx->get_const_row_ptrs(),
as_cuda_type(mtx->get_values()), lower);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_ISAI_IDENTITY_TRIANGLE_KERNEL);


} // namespace isai
} // namespace cuda
} // namespace kernels
Expand Down
Loading

0 comments on commit 90a0e5c

Please sign in to comment.