Skip to content

Commit

Permalink
Fix the automatical strategy when shared.
Browse files Browse the repository at this point in the history
The automatical strategy has an issue when shared, it behaves as the
wrong strategy if the matrices using it have different properties.
This fixes this issue only by extending the current interface, and not
changing any existing function. In effect:
+ Add a new virtual `copy` function to the strategies in order to create
  a new shared pointer polymorphically.
+ The approach used is to always copy the strategy, whenever a CSR
  matrix is instantiated or copied. Thankfully, the strategies are
  currently light objects so this should create few overhead.
+ Add tests which ensure that matrices with difference properties work
  correctly with the automatical strategy.

Fixes #426
  • Loading branch information
tcojean committed Jun 5, 2020
1 parent 36cb424 commit f768210
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 15 deletions.
8 changes: 6 additions & 2 deletions core/test/matrix/csr_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,15 @@ TYPED_TEST(CsrBuilder, UpdatesSrowOnDestruction)
using index_type = typename TestFixture::index_type;
struct mock_strategy : public Mtx::strategy_type {
virtual void process(const gko::Array<index_type> &,
gko::Array<index_type> *)
gko::Array<index_type> *) override
{
*was_called = true;
}
virtual int64_t clac_size(const int64_t nnz) { return 0; }
virtual int64_t clac_size(const int64_t nnz) override { return 0; }
virtual std::shared_ptr<typename Mtx::strategy_type> copy() override
{
return std::make_shared<mock_strategy>(*was_called);
}

mock_strategy(bool &flag) : Mtx::strategy_type(""), was_called(&flag) {}

Expand Down
25 changes: 25 additions & 0 deletions cuda/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,4 +664,29 @@ TEST_F(Csr, SortUnsortedMatrixIsEquivalentToRef)
}


TEST_F(Csr, OneAutomaticalWorksWithDifferentMatrices)
{
auto row_len_limit = std::max(Mtx::automatical::nvidia_row_len_limit,
Mtx::automatical::amd_row_len_limit);
auto load_balance_mtx = Mtx::create(ref);
auto classical_mtx = Mtx::create(ref);
load_balance_mtx->copy_from(
gen_mtx<Vec>(1, row_len_limit + 1000, row_len_limit + 1));
classical_mtx->copy_from(gen_mtx<Vec>(50, 50, 1));
auto load_balance_mtx_d = Mtx::create(cuda);
auto classical_mtx_d = Mtx::create(cuda);
load_balance_mtx_d->copy_from(load_balance_mtx.get());
classical_mtx_d->copy_from(classical_mtx.get());
auto automatical = std::make_shared<Mtx::automatical>();

load_balance_mtx_d->set_strategy(automatical);
classical_mtx_d->set_strategy(automatical);

EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name());
EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name());
ASSERT_NE(load_balance_mtx_d->get_strategy().get(),
classical_mtx_d->get_strategy().get());
}


} // namespace
25 changes: 25 additions & 0 deletions hip/test/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,4 +638,29 @@ TEST_F(Csr, SortUnsortedMatrixIsEquivalentToRef)
}


TEST_F(Csr, OneAutomaticalWorksWithDifferentMatrices)
{
auto load_balance_mtx = Mtx::create(ref);
auto classical_mtx = Mtx::create(ref);
auto row_len_limit = std::max(Mtx::automatical::nvidia_row_len_limit,
Mtx::automatical::amd_row_len_limit);
load_balance_mtx->copy_from(
gen_mtx<Vec>(1, row_len_limit + 1000, row_len_limit + 1));
classical_mtx->copy_from(gen_mtx<Vec>(50, 50, 1));
auto load_balance_mtx_d = Mtx::create(hip);
auto classical_mtx_d = Mtx::create(hip);
load_balance_mtx_d->copy_from(load_balance_mtx.get());
classical_mtx_d->copy_from(classical_mtx.get());
auto automatical = std::make_shared<Mtx::automatical>();

load_balance_mtx_d->set_strategy(automatical);
classical_mtx_d->set_strategy(automatical);

EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name());
EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name());
ASSERT_NE(load_balance_mtx_d->get_strategy().get(),
classical_mtx_d->get_strategy().get());
}


} // namespace
81 changes: 68 additions & 13 deletions include/ginkgo/core/matrix/csr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
*/
virtual int64_t clac_size(const int64_t nnz) = 0;

/**
* Copy a strategy. This is a workaround until strategies are revamped,
* since strategies like `automatical` do not work when actually shared.
*/
virtual std::shared_ptr<strategy_type> copy() = 0;

protected:
void set_name(std::string name) { name_ = name; }

Expand Down Expand Up @@ -215,6 +221,11 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
return max_length_per_row_;
}

std::shared_ptr<strategy_type> copy() override
{
return std::make_shared<classical>();
}

private:
index_type max_length_per_row_;
};
Expand All @@ -236,6 +247,11 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
{}

int64_t clac_size(const int64_t nnz) override { return 0; }

std::shared_ptr<strategy_type> copy() override
{
return std::make_shared<merge_path>();
}
};

/**
Expand All @@ -256,6 +272,11 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
{}

int64_t clac_size(const int64_t nnz) override { return 0; }

std::shared_ptr<strategy_type> copy() override
{
return std::make_shared<cusparse>();
}
};

/**
Expand All @@ -275,6 +296,11 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
{}

int64_t clac_size(const int64_t nnz) override { return 0; }

std::shared_ptr<strategy_type> copy() override
{
return std::make_shared<sparselib>();
}
};

/**
Expand Down Expand Up @@ -406,6 +432,12 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
}
}

std::shared_ptr<strategy_type> copy() override
{
return std::make_shared<load_balance>(nwarps_, warp_size_,
cuda_strategy_);
}

private:
int64_t nwarps_;
int warp_size_;
Expand All @@ -414,16 +446,29 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,

class automatical : public strategy_type {
public:
/* Use imbalance strategy when the maximum number of nonzero per row is
* more than 1024 on NVIDIA hardware */
static constexpr index_type nvidia_row_len_limit = 1024;
/* Use imbalance strategy when the matrix has more more than 1e6 on
* NVIDIA hardware */
static constexpr index_type nvidia_nnz_limit = 1e6;
/* Use imbalance strategy when the maximum number of nonzero per row is
* more than 768 on AMD hardware */
static constexpr index_type amd_row_len_limit = 768;
/* Use imbalance strategy when the matrix has more more than 1e8 on AMD
* hardware */
static constexpr index_type amd_nnz_limit = 1e8;

/**
* Creates a automatical strategy.
* Creates an automatical strategy.
*/
automatical()
: automatical(std::move(
gko::CudaExecutor::create(0, gko::OmpExecutor::create())))
{}

/**
* Creates a automatical strategy with CUDA executor.
* Creates an automatical strategy with CUDA executor.
*
* @param exec the CUDA executor
*/
Expand All @@ -432,7 +477,7 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
{}

/**
* Creates a automatical strategy with HIP executor.
* Creates an automatical strategy with HIP executor.
*
* @param exec the HIP executor
*/
Expand All @@ -441,7 +486,7 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
{}

/**
* Creates a automatical strategy with specified parameters
* Creates an automatical strategy with specified parameters
*
* @param nwarps the number of warps in the executor
* @param warp_size the warp size of the executor
Expand All @@ -466,14 +511,12 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
// if the number of stored elements is larger than <nnz_limit> or
// the maximum number of stored elements per row is larger than
// <row_len_limit>, use load_balance otherwise use classical
// CUDA: nnz_limit = 1e6, row_len_limit = 1024
// AMD: nnz_limit = 1e8, row_len_limit = 768
index_type nnz_limit = 1e6;
index_type row_len_limit = 1024;
index_type nnz_limit = nvidia_nnz_limit;
index_type row_len_limit = nvidia_row_len_limit;
#if GINKGO_HIP_PLATFORM_HCC
if (!cuda_strategy_) {
nnz_limit = 1e8;
row_len_limit = 768;
nnz_limit = amd_nnz_limit;
row_len_limit = amd_row_len_limit;
}
#endif // GINKGO_HIP_PLATFORM_HCC
auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
Expand Down Expand Up @@ -539,6 +582,12 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
return max_length_per_row_;
}

std::shared_ptr<strategy_type> copy() override
{
return std::make_shared<automatical>(nwarps_, warp_size_,
cuda_strategy_);
}

private:
int64_t nwarps_;
int warp_size_;
Expand All @@ -549,7 +598,13 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
void convert_to(Csr<ValueType, IndexType> *result) const override
{
bool same_executor = this->get_executor() == result->get_executor();
EnableLinOp<Csr>::convert_to(result);
// NOTE: as soon as strategies are improved, this can be reverted
result->values_ = this->values_;
result->col_idxs_ = this->col_idxs_;
result->row_ptrs_ = this->row_ptrs_;
result->srow_ = this->srow_;
result->strategy_ = std::move(this->get_strategy()->copy());
// END NOTE
if (!same_executor) {
detail::strategy_rebuild_helper(result);
}
Expand Down Expand Up @@ -771,7 +826,7 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
col_idxs_(exec, num_nonzeros),
row_ptrs_(exec, size[0] + 1),
srow_(exec, strategy->clac_size(num_nonzeros)),
strategy_(std::move(strategy))
strategy_(std::move(strategy->copy()))
{}

/**
Expand Down Expand Up @@ -804,7 +859,7 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
col_idxs_{exec, std::forward<ColIdxsArray>(col_idxs)},
row_ptrs_{exec, std::forward<RowPtrsArray>(row_ptrs)},
srow_(exec),
strategy_(std::move(strategy))
strategy_(std::move(strategy->copy()))
{
GKO_ASSERT_EQ(values_.get_num_elems(), col_idxs_.get_num_elems());
GKO_ASSERT_EQ(this->get_size()[0] + 1, row_ptrs_.get_num_elems());
Expand Down

0 comments on commit f768210

Please sign in to comment.