Skip to content

Commit

Permalink
Review updates.
Browse files Browse the repository at this point in the history
+ Fix the SolveStruct namespace clarification.
+ Add a proper free for the workspace.
+ some doc clarifications.
  • Loading branch information
pratikvn committed Sep 6, 2019
1 parent 90b6f0a commit f1fceba
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 35 deletions.
6 changes: 6 additions & 0 deletions core/solver/lower_trs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ void LowerTrs<ValueType, IndexType>::apply_impl(const LinOp *b, LinOp *x) const

auto dense_b = as<const Vector>(b);
auto dense_x = as<Vector>(x);

// This kernel checks if a transpose is needed for the multiple rhs case.
// Currently only the algorithm for CUDA version <=9.1 needs this
// transposition due to the limitation in the cusparse algorithm. The other
// executors (omp and reference) do not use the transpose (trans_x and
// trans_b) and hence are passed in empty pointers.
bool do_transpose = false;
std::shared_ptr<Vector> trans_b;
std::shared_ptr<Vector> trans_x;
Expand Down
6 changes: 3 additions & 3 deletions core/solver/lower_trs_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,20 @@ namespace lower_trs {

#define GKO_DECLARE_LOWER_TRS_INIT_STRUCT_KERNEL() \
void init_struct(std::shared_ptr<const DefaultExecutor> exec, \
std::shared_ptr<gko::solver::SolveStruct> &solve_struct)
std::shared_ptr<solver::SolveStruct> &solve_struct)


#define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(_vtype, _itype) \
void generate(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype> *matrix, \
gko::solver::SolveStruct *solve_struct, \
solver::SolveStruct *solve_struct, \
const gko::size_type num_rhs)


#define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(_vtype, _itype) \
void solve(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype> *matrix, \
const gko::solver::SolveStruct *solve_struct, \
const solver::SolveStruct *solve_struct, \
matrix::Dense<_vtype> *trans_b, matrix::Dense<_vtype> *trans_x, \
const matrix::Dense<_vtype> *b, matrix::Dense<_vtype> *x)

Expand Down
1 change: 0 additions & 1 deletion core/test/solver/lower_trs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include <memory>
#include <typeinfo>


#include <gtest/gtest.h>
Expand Down
17 changes: 6 additions & 11 deletions cuda/base/cusparse_bindings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct SolveStruct {
void *factor_work_vec;
SolveStruct()
{
factor_work_vec = nullptr;
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateMatDescr(&factor_descr));
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetMatIndexBase(factor_descr, CUSPARSE_INDEX_BASE_ZERO));
Expand All @@ -71,8 +72,8 @@ struct SolveStruct {
algorithm = 0;
policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
}
SolveStruct(const SolveStruct &) {}
SolveStruct(SolveStruct &&) {}
SolveStruct(const SolveStruct &) : SolveStruct() {}
SolveStruct(SolveStruct &&) : SolveStruct() {}
SolveStruct &operator=(const SolveStruct &) { return *this; }
SolveStruct &operator=(SolveStruct &&) { return *this; }
~SolveStruct()
Expand All @@ -83,6 +84,7 @@ struct SolveStruct {
}
if (factor_work_vec != nullptr) {
cudaFree(factor_work_vec);
factor_work_vec = nullptr;
}
}
};
Expand All @@ -106,8 +108,8 @@ struct SolveStruct {
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetMatDiagType(factor_descr, CUSPARSE_DIAG_TYPE_NON_UNIT));
}
SolveStruct(const SolveStruct &) {}
SolveStruct(SolveStruct &&) {}
SolveStruct(const SolveStruct &) : SolveStruct() {}
SolveStruct(SolveStruct &&) : SolveStruct() {}
SolveStruct &operator=(const SolveStruct &) { return *this; }
SolveStruct &operator=(SolveStruct &&) { return *this; }
~SolveStruct()
Expand Down Expand Up @@ -571,13 +573,6 @@ inline void destroy(cusparseMatDescr_t descr)
}


inline gko::solver::SolveStruct *init_trs_solve_struct()
{
gko::solver::SolveStruct *solve_struct = new gko::solver::SolveStruct{};
return solve_struct;
}


// CUDA versions 9.2 and above have csrsm2.
#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))

Expand Down
14 changes: 8 additions & 6 deletions cuda/solver/lower_trs_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,17 @@ void should_perform_transpose(std::shared_ptr<const CudaExecutor> exec,


void init_struct(std::shared_ptr<const CudaExecutor> exec,
std::shared_ptr<gko::solver::SolveStruct> &solve_struct)
std::shared_ptr<solver::SolveStruct> &solve_struct)
{
solve_struct = std::shared_ptr<gko::solver::SolveStruct>(
new gko::solver::SolveStruct());
solve_struct =
std::shared_ptr<gko::solver::SolveStruct>(new solver::SolveStruct());
}


template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
gko::solver::SolveStruct *solve_struct,
const gko::size_type num_rhs)
solver::SolveStruct *solve_struct, const gko::size_type num_rhs)
{
if (cusparse::is_supported<ValueType, IndexType>::value) {
auto handle = exec->get_cusparse_handle();
Expand All @@ -120,6 +119,9 @@ void generate(std::shared_ptr<const CudaExecutor> exec,
&solve_struct->factor_work_size);

// allocate workspace
if (solve_struct->factor_work_vec != nullptr) {
GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(solve_struct->factor_work_vec));
}
solve_struct->factor_work_vec =
exec->alloc<void *>(solve_struct->factor_work_size);

Expand Down Expand Up @@ -164,7 +166,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
template <typename ValueType, typename IndexType>
void solve(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
const gko::solver::SolveStruct *solve_struct,
const solver::SolveStruct *solve_struct,
matrix::Dense<ValueType> *trans_b, matrix::Dense<ValueType> *trans_x,
const matrix::Dense<ValueType> *b, matrix::Dense<ValueType> *x)
{
Expand Down
4 changes: 1 addition & 3 deletions cuda/test/solver/lower_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <random>


#include <gtest/gtest.h>


#include <cuda.h>
#include <gtest/gtest.h>


#include <ginkgo/core/base/exception.hpp>
Expand Down
9 changes: 7 additions & 2 deletions include/ginkgo/core/solver/lower_trs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ struct SolveStruct;
* format. If the matrix is not in CSR, then the generate step converts it into
* a CSR matrix. The generation fails if the matrix is not convertible to CSR.
*
* @note As the constructor uses the copy and convert functionality, it is not
* possible to create a empty solver or a solver with a matrix in any other
* format other than CSR, if none of the executor modules are being compiled
* with.
*
* @tparam ValueType precision of matrix elements
* @tparam IndexType precision of matrix indices
*
Expand Down Expand Up @@ -111,7 +116,7 @@ class LowerTrs : public EnableLinOp<LowerTrs<ValueType, IndexType>>,
/**
* Number of right hand sides.
*
* @note: This value is currently a dummy value which is not used by the
* @note This value is currently a dummy value which is not used by the
* analysis step. It is possible that future algorithms (cusparse
* csrsm2) make use of the number of right hand sides for a more
* sophisticated implementation. Hence this parameter is left here. But
Expand Down Expand Up @@ -173,7 +178,7 @@ class LowerTrs : public EnableLinOp<LowerTrs<ValueType, IndexType>>,
private:
std::shared_ptr<const matrix::Csr<ValueType, IndexType>> system_matrix_{};
std::shared_ptr<const LinOp> preconditioner_{};
std::shared_ptr<gko::solver::SolveStruct> solve_struct_;
std::shared_ptr<solver::SolveStruct> solve_struct_;
};


Expand Down
7 changes: 3 additions & 4 deletions omp/solver/lower_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void should_perform_transpose(std::shared_ptr<const OmpExecutor> exec,


void init_struct(std::shared_ptr<const OmpExecutor> exec,
std::shared_ptr<gko::solver::SolveStruct> &solve_struct)
std::shared_ptr<solver::SolveStruct> &solve_struct)
{
// This init kernel is here to allow initialization of the solve struct for
// a more sophisticated implementation as for other executors.
Expand All @@ -77,8 +77,7 @@ void init_struct(std::shared_ptr<const OmpExecutor> exec,
template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
gko::solver::SolveStruct *solve_struct,
const gko::size_type num_rhs)
solver::SolveStruct *solve_struct, const gko::size_type num_rhs)
{
// This generate kernel is here to allow for a more sophisticated
// implementation as for other executors. This kernel would perform the
Expand All @@ -92,7 +91,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
template <typename ValueType, typename IndexType>
void solve(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
const gko::solver::SolveStruct *solve_struct,
const solver::SolveStruct *solve_struct,
matrix::Dense<ValueType> *trans_b, matrix::Dense<ValueType> *trans_x,
const matrix::Dense<ValueType> *b, matrix::Dense<ValueType> *x)
{
Expand Down
7 changes: 3 additions & 4 deletions reference/solver/lower_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void should_perform_transpose(std::shared_ptr<const ReferenceExecutor> exec,


void init_struct(std::shared_ptr<const ReferenceExecutor> exec,
std::shared_ptr<gko::solver::SolveStruct> &solve_struct)
std::shared_ptr<solver::SolveStruct> &solve_struct)
{
// This init kernel is here to allow initialization of the solve struct for
// a more sophisticated implementation as for other executors.
Expand All @@ -73,8 +73,7 @@ void init_struct(std::shared_ptr<const ReferenceExecutor> exec,
template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
gko::solver::SolveStruct *solve_struct,
const gko::size_type num_rhs)
solver::SolveStruct *solve_struct, const gko::size_type num_rhs)
{
// This generate kernel is here to allow for a more sophisticated
// implementation as for other executors. This kernel would perform the
Expand All @@ -88,7 +87,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
template <typename ValueType, typename IndexType>
void solve(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
const gko::solver::SolveStruct *solve_struct,
const solver::SolveStruct *solve_struct,
matrix::Dense<ValueType> *trans_b, matrix::Dense<ValueType> *trans_x,
const matrix::Dense<ValueType> *b, matrix::Dense<ValueType> *x)
{
Expand Down
1 change: 0 additions & 1 deletion reference/test/solver/lower_trs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include <memory>
#include <typeinfo>


#include <gtest/gtest.h>
Expand Down

0 comments on commit f1fceba

Please sign in to comment.