diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fb62494135d..eaf638e04d3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -56,6 +56,8 @@ stages: - | (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1 - ctest -V + - make install + - make test_install dependencies: [] except: - schedules diff --git a/CHANGELOG.md b/CHANGELOG.md index e91de8b135d..32a05209929 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,23 @@ commits. For a comprehensive list, use the following command: git log --first-parent ``` +## Version 1.1.1 +This version of Ginkgo provides a few fixes in Ginkgo's core routines. The +supported systems and requirements are unchanged from version 1.1.0. + +### Fixes ++ Improve Ginkgo's installation and fix the `test_install` step ([#406](https://github.com/ginkgo-project/ginkgo/pull/406)), ++ Fix some documentation issues ([#406](https://github.com/ginkgo-project/ginkgo/pull/406)), ++ Fix multiple code issues reported by sonarqube ([#406](https://github.com/ginkgo-project/ginkgo/pull/406)), ++ Update the git-cmake-format repository ([#399](https://github.com/ginkgo-project/ginkgo/pull/399)), ++ Improve the global update header script ([#390](https://github.com/ginkgo-project/ginkgo/pull/390)), ++ Fix broken bounds checks ([#388](https://github.com/ginkgo-project/ginkgo/pull/388)), ++ Fix CSR strategies and improve performance ([#379](https://github.com/ginkgo-project/ginkgo/pull/379)), ++ Fix a small typo in the stencil examples ([#381](https://github.com/ginkgo-project/ginkgo/pull/381)), ++ Fix ELL error on small matrices ([#375](https://github.com/ginkgo-project/ginkgo/pull/375)), ++ Fix SellP read function ([#374](https://github.com/ginkgo-project/ginkgo/pull/374)), ++ Add factorization support in `create_new_algorithm.sh` ([#371](https://github.com/ginkgo-project/ginkgo/pull/371)) + ## Version 1.1.0 The Ginkgo team is proud to announce the new minor release of Ginkgo version diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c94059125f..6e9af2bdd07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.9) -project(Ginkgo LANGUAGES C CXX VERSION 1.1.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures") +project(Ginkgo LANGUAGES C CXX VERSION 1.1.1 DESCRIPTION "A numerical linear algebra library targeting many-core architectures") set(Ginkgo_VERSION_TAG "master") set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG}) @@ -160,8 +160,9 @@ if(GINKGO_DEVEL_TOOLS) endif() # Generate the global `ginkgo/ginkgo.hpp` header with every call of make +# when bash is present and the developer tools are enabled find_program(BASH bash) -if(NOT "${BASH}" STREQUAL "BASH-NOTFOUND") +if(NOT "${BASH}" STREQUAL "BASH-NOTFOUND" AND GINKGO_DEVEL_TOOLS) add_custom_target(generate_ginkgo_header ALL COMMAND ${Ginkgo_SOURCE_DIR}/dev_tools/scripts/update_ginkgo_header.sh WORKING_DIRECTORY ${Ginkgo_SOURCE_DIR}) @@ -187,6 +188,7 @@ add_custom_target(test_install COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} -H${Ginkgo_SOURCE_DIR}/test_install -B${Ginkgo_BINARY_DIR}/test_install -DCMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/${GINKGO_INSTALL_CONFIG_DIR} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} COMMAND ${CMAKE_COMMAND} --build ${Ginkgo_BINARY_DIR}/test_install COMMAND ${Ginkgo_BINARY_DIR}/test_install/test_install COMMENT "Running a test on the installed binaries. This requires running `(sudo) make install` first.") diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index 1db5cbbffa4..e944ff6e933 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -61,6 +61,8 @@ set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@) set(GINKGO_CUDA_ARCHITECTURES @GINKGO_CUDA_ARCHITECTURES@) set(GINKGO_CUDA_HOST_COMPILER @CMAKE_CUDA_HOST_COMPILER@) +set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@) + # Ginkgo external package variables set(GINKGO_USE_EXTERNAL_CAS "@GINKGO_USE_EXTERNAL_CAS@") set(GINKGO_USE_EXTERNAL_GTEST "@GINKGO_USE_EXTERNAL_GTEST@") @@ -80,10 +82,13 @@ set(TPL_RAPIDJSON_INCLUDE_DIRS "@TPL_RAPIDJSON_INCLUDE_DIRS@") # Ginkgo installation configuration set(GINKGO_CONFIG_FILE_PATH "${CMAKE_CURRENT_LIST_DIR}") string(REPLACE "@GINKGO_INSTALL_CONFIG_DIR@" "" GINKGO_INSTALL_PREFIX "${GINKGO_CONFIG_FILE_PATH}") -set(GINKGO_INSTALL_INCLUDE_DIR "@GINKGO_INSTALL_INCLUDE_DIR@") -set(GINKGO_INSTALL_LIBRARY_DIR "@GINKGO_INSTALL_LIBRARY_DIR@") -set(GINKGO_INSTALL_PKGCONFIG_DIR "@GINKGO_INSTALL_PKGCONFIG_DIR@") -set(GINKGO_INSTALL_CONFIG_DIR "@GINKGO_INSTALL_CONFIG_DIR@") +set(GINKGO_INSTALL_INCLUDE_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_INCLUDE_DIR@") +set(GINKGO_INSTALL_LIBRARY_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_LIBRARY_DIR@") +set(GINKGO_INSTALL_PKGCONFIG_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_PKGCONFIG_DIR@") +set(GINKGO_INSTALL_CONFIG_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_CONFIG_DIR@") +set(GINKGO_INSTALL_MODULE_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_MODULE_DIR@") +set(CMAKE_MODULE_PATH "${GINKGO_INSTALL_MODULE_DIR}") + set(GINKGO_INTERFACE_LINK_LIBRARIES "@GINKGO_INTERFACE_LINK_LIBRARIES@") set(GINKGO_INTERFACE_LINK_FLAGS "@GINKGO_INTERFACE_LINK_FLAGS@") @@ -111,6 +116,8 @@ set(GINKGO_OPENMP_LIBRARIES @OpenMP_CXX_LIBRARIES@) set(GINKGO_OPENMP_FLAGS "@OpenMP_CXX_FLAGS@") +# Modulepath configuration + # NOTE: we do not export benchmarks, examples, tests or devel tools # so `third_party` libraries are currently unneeded. diff --git a/cmake/Modules/FindPAPI.cmake b/cmake/Modules/FindPAPI.cmake index 15f5b5cd38a..3e16af2e125 100644 --- a/cmake/Modules/FindPAPI.cmake +++ b/cmake/Modules/FindPAPI.cmake @@ -68,30 +68,31 @@ if(PAPI_INCLUDE_DIR) if (PAPI_LIBRARY) # find the components - foreach(component IN LISTS PAPI_FIND_COMPONENTS) - file(WRITE "${CMAKE_BINARY_DIR}/papi_${component}_detect.c" - " - #include - int main() { - int retval; - retval = PAPI_library_init(PAPI_VER_CURRENT); - if (retval != PAPI_VER_CURRENT && retval > 0) - return -1; - if (PAPI_get_component_index(\"${component}\") < 0) - return 0; - return 1; - } - ") - try_run(PAPI_${component}_FOUND - gko_result_unused - "${CMAKE_BINARY_DIR}" - "${CMAKE_BINARY_DIR}/papi_${component}_detect.c" - LINK_LIBRARIES ${PAPI_LIBRARY} - ) + enable_language(C) + foreach(component IN LISTS PAPI_FIND_COMPONENTS) + file(WRITE "${CMAKE_BINARY_DIR}/papi_${component}_detect.c" + " + #include + int main() { + int retval; + retval = PAPI_library_init(PAPI_VER_CURRENT); + if (retval != PAPI_VER_CURRENT && retval > 0) + return -1; + if (PAPI_get_component_index(\"${component}\") < 0) + return 0; + return 1; + }" + ) + try_run(PAPI_${component}_FOUND + gko_result_unused + "${CMAKE_BINARY_DIR}" + "${CMAKE_BINARY_DIR}/papi_${component}_detect.c" + LINK_LIBRARIES ${PAPI_LIBRARY} + ) - if (NOT PAPI_${component}_FOUND EQUAL 1) - unset(PAPI_${component}_FOUND) - endif() + if (NOT PAPI_${component}_FOUND EQUAL 1) + unset(PAPI_${component}_FOUND) + endif() endforeach() endif() endif() diff --git a/cmake/build_helpers.cmake b/cmake/build_helpers.cmake index 76125a761a6..8a8ad047d56 100644 --- a/cmake/build_helpers.cmake +++ b/cmake/build_helpers.cmake @@ -94,4 +94,4 @@ endmacro() macro(ginkgo_switch_to_windows_dynamic lang) ginkgo_switch_windows_link(${lang} "MT" "MD") -endmacro() \ No newline at end of file +endmacro() diff --git a/cmake/install_helpers.cmake b/cmake/install_helpers.cmake index 1b9f73b1c69..fd0c90d383f 100644 --- a/cmake/install_helpers.cmake +++ b/cmake/install_helpers.cmake @@ -6,6 +6,7 @@ set(GINKGO_INSTALL_INCLUDE_DIR "include") set(GINKGO_INSTALL_LIBRARY_DIR "lib") set(GINKGO_INSTALL_PKGCONFIG_DIR "lib/pkgconfig") set(GINKGO_INSTALL_CONFIG_DIR "lib/cmake/Ginkgo") +set(GINKGO_INSTALL_MODULE_DIR "lib/cmake/Ginkgo/Modules") function(ginkgo_install_library name subdir) # install .so and .a files @@ -33,6 +34,9 @@ function(ginkgo_install) install(FILES "${Ginkgo_SOURCE_DIR}/third_party/papi_sde/papi_sde_interface.h" DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}/third_party/papi_sde" ) + install(FILES "${Ginkgo_SOURCE_DIR}/cmake/Modules/FindPAPI.cmake" + DESTINATION "${GINKGO_INSTALL_MODULE_DIR}/" + ) endif() # export targets diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index 77ed59ca9a0..884b85425b3 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -43,9 +43,9 @@ namespace gko { version version_info::get_cuda_version() noexcept { - // We just return 1.1.0 with a special "not compiled" tag in placeholder - // modules. - return {1, 1, 0, "not compiled"}; + // We just return the version with a special "not compiled" tag in + // placeholder modules. + return {GKO_VERSION_STR, "not compiled"}; } diff --git a/core/device_hooks/omp_hooks.cpp b/core/device_hooks/omp_hooks.cpp index fdf598ab5eb..4fb251758a8 100644 --- a/core/device_hooks/omp_hooks.cpp +++ b/core/device_hooks/omp_hooks.cpp @@ -38,9 +38,9 @@ namespace gko { version version_info::get_omp_version() noexcept { - // We just return 1.1.0 with a special "not compiled" tag in placeholder - // modules. - return {1, 1, 0, "not compiled"}; + // We just return the version with a special "not compiled" tag in + // placeholder modules. + return {GKO_VERSION_STR, "not compiled"}; } diff --git a/core/device_hooks/reference_hooks.cpp b/core/device_hooks/reference_hooks.cpp index 31e51fd0d27..7e7ab287ca5 100644 --- a/core/device_hooks/reference_hooks.cpp +++ b/core/device_hooks/reference_hooks.cpp @@ -39,9 +39,9 @@ namespace gko { version version_info::get_reference_version() noexcept { - // We just return 1.1.0 with a special "not compiled" tag in placeholder - // modules. - return {1, 1, 0, "not compiled"}; + // We just return the version with a special "not compiled" tag in + // placeholder modules. + return {GKO_VERSION_STR, "not compiled"}; } diff --git a/core/devices/cuda/executor.cpp b/core/devices/cuda/executor.cpp index 0be14ce9fe4..b377b2afa94 100644 --- a/core/devices/cuda/executor.cpp +++ b/core/devices/cuda/executor.cpp @@ -48,7 +48,7 @@ std::shared_ptr CudaExecutor::get_master() const noexcept } -int CudaExecutor::num_execs[max_devices]; +unsigned CudaExecutor::num_execs[max_devices]; std::mutex CudaExecutor::mutex[max_devices]; diff --git a/core/matrix/coo.cpp b/core/matrix/coo.cpp index a9dddadf5ab..73d001d6dba 100644 --- a/core/matrix/coo.cpp +++ b/core/matrix/coo.cpp @@ -108,10 +108,13 @@ void Coo::convert_to( Csr *result) const { auto exec = this->get_executor(); - auto tmp = Csr::create(exec, this->get_size()); + auto tmp = Csr::create( + exec, this->get_size(), this->get_num_stored_elements(), + result->get_strategy()); tmp->values_ = this->values_; tmp->col_idxs_ = this->col_idxs_; exec->run(coo::make_convert_to_csr(tmp.get(), this)); + tmp->make_srow(); tmp->move_to(result); } @@ -120,10 +123,13 @@ template void Coo::move_to(Csr *result) { auto exec = this->get_executor(); - auto tmp = Csr::create(exec, this->get_size()); + auto tmp = Csr::create( + exec, this->get_size(), this->get_num_stored_elements(), + result->get_strategy()); tmp->values_ = std::move(this->values_); tmp->col_idxs_ = std::move(this->col_idxs_); exec->run(coo::make_convert_to_csr(tmp.get(), this)); + tmp->make_srow(); tmp->move_to(result); } diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 72c23343857..b284593d21a 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -108,12 +108,21 @@ inline void conversion_helper(Csr *result, { auto exec = source->get_executor(); - size_type num_stored_nonzeros = 0; - exec->run(dense::make_count_nonzeros(source, &num_stored_nonzeros)); - auto tmp = Csr::create( - exec, source->get_size(), num_stored_nonzeros, result->get_strategy()); - exec->run(op(tmp.get(), source)); - tmp->move_to(result); + if (source->get_size()) { + size_type num_stored_nonzeros = 0; + exec->run(dense::make_count_nonzeros(source, &num_stored_nonzeros)); + auto tmp = Csr::create(exec, source->get_size(), + num_stored_nonzeros, + result->get_strategy()); + exec->run(op(tmp.get(), source)); + tmp->move_to(result); + } + // If source is empty, there is no need to copy data or to call kernels + else { + auto tmp = + Csr::create(exec, result->get_strategy()); + tmp->move_to(result); + } } diff --git a/core/matrix/ell.cpp b/core/matrix/ell.cpp index a4b64f5a82c..1d4bed57288 100644 --- a/core/matrix/ell.cpp +++ b/core/matrix/ell.cpp @@ -134,10 +134,11 @@ void Ell::convert_to( size_type num_stored_elements = 0; exec->run(ell::make_count_nonzeros(this, &num_stored_elements)); - auto tmp = Csr::create(exec, this->get_size(), - num_stored_elements); + auto tmp = Csr::create( + exec, this->get_size(), num_stored_elements, result->get_strategy()); exec->run(ell::make_convert_to_csr(tmp.get(), this)); + tmp->make_srow(); tmp->move_to(result); } diff --git a/core/matrix/hybrid.cpp b/core/matrix/hybrid.cpp index cf956c9d0d8..40a50a377ea 100644 --- a/core/matrix/hybrid.cpp +++ b/core/matrix/hybrid.cpp @@ -135,10 +135,11 @@ void Hybrid::convert_to( size_type num_stored_elements = 0; exec->run(hybrid::make_count_nonzeros(this, &num_stored_elements)); - auto tmp = Csr::create(exec, this->get_size(), - num_stored_elements); + auto tmp = Csr::create( + exec, this->get_size(), num_stored_elements, result->get_strategy()); exec->run(hybrid::make_convert_to_csr(tmp.get(), this)); + tmp->make_srow(); tmp->move_to(result); } @@ -161,8 +162,9 @@ void Hybrid::read(const mat_data &data) get_each_row_nnz(data, row_nnz); strategy_->compute_hybrid_config(row_nnz, &ell_lim, &coo_lim); - auto tmp = Hybrid::create(this->get_executor()->get_master(), data.size, - ell_lim, data.size[0], coo_lim); + auto tmp = + Hybrid::create(this->get_executor()->get_master(), data.size, ell_lim, + data.size[0], coo_lim, this->get_strategy()); // Get values and column indexes. size_type ind = 0; diff --git a/core/matrix/sellp.cpp b/core/matrix/sellp.cpp index 67552874cf8..880678b3738 100644 --- a/core/matrix/sellp.cpp +++ b/core/matrix/sellp.cpp @@ -76,6 +76,12 @@ size_type calculate_total_cols(const matrix_data &data, IndexType current_slice = 0; size_type total_cols = 0; for (const auto &elem : data.nonzeros) { + if (elem.row != current_row) { + current_row = elem.row; + slice_lengths[current_slice] = + max(slice_lengths[current_slice], nonzeros_per_row); + nonzeros_per_row = 0; + } if (elem.row / slice_size != current_slice) { slice_lengths[current_slice] = stride_factor * @@ -83,12 +89,6 @@ size_type calculate_total_cols(const matrix_data &data, total_cols += slice_lengths[current_slice]; current_slice = elem.row / slice_size; } - if (elem.row != current_row) { - current_row = elem.row; - slice_lengths[current_slice] = - max(slice_lengths[current_slice], nonzeros_per_row); - nonzeros_per_row = 0; - } nonzeros_per_row += (elem.value != zero()); } slice_lengths[current_slice] = @@ -147,9 +147,10 @@ void Sellp::convert_to( size_type num_stored_nonzeros = 0; exec->run(sellp::make_count_nonzeros(this, &num_stored_nonzeros)); - auto tmp = Csr::create(exec, this->get_size(), - num_stored_nonzeros); + auto tmp = Csr::create( + exec, this->get_size(), num_stored_nonzeros, result->get_strategy()); exec->run(sellp::make_convert_to_csr(tmp.get(), this)); + tmp->make_srow(); tmp->move_to(result); } diff --git a/core/test/base/exception_helpers.cpp b/core/test/base/exception_helpers.cpp index a6ac533b545..dd013835300 100644 --- a/core/test/base/exception_helpers.cpp +++ b/core/test/base/exception_helpers.cpp @@ -55,7 +55,7 @@ TEST(NotCompiled, ThrowsWhenUsed) } -void does_not_support_int() { throw GKO_NOT_SUPPORTED(int); } +void does_not_support_int() { GKO_NOT_SUPPORTED(int); } TEST(NotSupported, ReturnsNotSupportedException) { @@ -131,6 +131,18 @@ TEST(AssertConformant, ThrowsWhenNotConformant) } +TEST(AssertEqual, DoesNotThrowWhenEqual) +{ + ASSERT_NO_THROW(GKO_ASSERT_EQ(1, 1)); +} + + +TEST(AssertEqual, ThrowsWhenNotEqual) +{ + ASSERT_THROW(GKO_ASSERT_EQ(0, 1), gko::ValueMismatch); +} + + TEST(AssertEqualRows, DoesNotThrowWhenEqualRowSize) { ASSERT_NO_THROW( diff --git a/core/test/matrix/csr.cpp b/core/test/matrix/csr.cpp index bd730edbea6..155f2c8ce21 100644 --- a/core/test/matrix/csr.cpp +++ b/core/test/matrix/csr.cpp @@ -127,11 +127,13 @@ TEST_F(Csr, CanBeCreatedFromExistingData) double values[] = {1.0, 2.0, 3.0, 4.0}; gko::int32 col_idxs[] = {0, 1, 1, 0}; gko::int32 row_ptrs[] = {0, 2, 3, 4}; + auto mtx = gko::matrix::Csr<>::create( exec, gko::dim<2>{3, 2}, gko::Array::view(exec, 4, values), gko::Array::view(exec, 4, col_idxs), gko::Array::view(exec, 4, row_ptrs), std::make_shared(2)); + ASSERT_EQ(mtx->get_num_srow_elements(), 1); ASSERT_EQ(mtx->get_const_values(), values); ASSERT_EQ(mtx->get_const_col_idxs(), col_idxs); @@ -183,6 +185,7 @@ TEST_F(Csr, CanBeCleared) TEST_F(Csr, CanBeReadFromMatrixData) { auto m = Mtx::create(exec, std::make_shared(2)); + m->read({{2, 3}, {{0, 0, 1.0}, {0, 1, 3.0}, diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 48561d475a8..d0b46c83c51 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -673,6 +673,8 @@ void spmv(std::shared_ptr exec, as_cuda_type(b->get_const_values()), as_cuda_type(b->get_stride()), as_cuda_type(c->get_values()), as_cuda_type(c->get_stride())); + } else { + GKO_NOT_SUPPORTED(nwarps); } } else if (a->get_strategy()->get_name() == "merge_path") { int items_per_thread = diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu index cc82d30a240..76abffe5859 100644 --- a/cuda/matrix/ell_kernels.cu +++ b/cuda/matrix/ell_kernels.cu @@ -85,10 +85,11 @@ constexpr double ratio = 1e-2; /** * A compile-time list of sub-warp sizes for which the spmv kernels should be * compiled. - * 0 is a special case where it uses a sub-warp size of 32 in + * 0 is a special case where it uses a sub-warp size of warp_size in * combination with atomic_adds. */ -using compiled_kernels = syn::value_list; +using compiled_kernels = + syn::value_list; namespace kernel { @@ -97,7 +98,7 @@ namespace { template -__device__ void spmv_kernel(const size_type num_rows, +__device__ void spmv_kernel(const size_type num_rows, const int nwarps_per_row, const ValueType *__restrict__ val, const IndexType *__restrict__ col, const size_type stride, @@ -108,9 +109,7 @@ __device__ void spmv_kernel(const size_type num_rows, { const auto tidx = static_cast(blockDim.x) * blockIdx.x + threadIdx.x; - const auto nwarps_per_row = - gridDim.x * blockDim.x / num_rows / subwarp_size; - const auto x = tidx / subwarp_size / nwarps_per_row; + const IndexType x = tidx / subwarp_size / nwarps_per_row; const auto warp_id = tidx / subwarp_size % nwarps_per_row; const auto y_start = tidx % subwarp_size + num_stored_elements_per_row * warp_id / nwarps_per_row; @@ -148,24 +147,26 @@ __device__ void spmv_kernel(const size_type num_rows, template __global__ __launch_bounds__(default_block_size) void spmv( - const size_type num_rows, const ValueType *__restrict__ val, - const IndexType *__restrict__ col, const size_type stride, - const size_type num_stored_elements_per_row, + const size_type num_rows, const int nwarps_per_row, + const ValueType *__restrict__ val, const IndexType *__restrict__ col, + const size_type stride, const size_type num_stored_elements_per_row, const ValueType *__restrict__ b, const size_type b_stride, ValueType *__restrict__ c, const size_type c_stride) { spmv_kernel( - num_rows, val, col, stride, num_stored_elements_per_row, b, b_stride, c, - c_stride, [](const ValueType &x, const ValueType &y) { return x; }); + num_rows, nwarps_per_row, val, col, stride, num_stored_elements_per_row, + b, b_stride, c, c_stride, + [](const ValueType &x, const ValueType &y) { return x; }); } template __global__ __launch_bounds__(default_block_size) void spmv( - const size_type num_rows, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, const IndexType *__restrict__ col, - const size_type stride, const size_type num_stored_elements_per_row, + const size_type num_rows, const int nwarps_per_row, + const ValueType *__restrict__ alpha, const ValueType *__restrict__ val, + const IndexType *__restrict__ col, const size_type stride, + const size_type num_stored_elements_per_row, const ValueType *__restrict__ b, const size_type b_stride, const ValueType *__restrict__ beta, ValueType *__restrict__ c, const size_type c_stride) @@ -178,15 +179,15 @@ __global__ __launch_bounds__(default_block_size) void spmv( // operation. if (atomic) { spmv_kernel( - num_rows, val, col, stride, num_stored_elements_per_row, b, - b_stride, c, c_stride, + num_rows, nwarps_per_row, val, col, stride, + num_stored_elements_per_row, b, b_stride, c, c_stride, [&alpha_val](const ValueType &x, const ValueType &y) { return alpha_val * x; }); } else { spmv_kernel( - num_rows, val, col, stride, num_stored_elements_per_row, b, - b_stride, c, c_stride, + num_rows, nwarps_per_row, val, col, stride, + num_stored_elements_per_row, b, b_stride, c, c_stride, [&alpha_val, &beta_val](const ValueType &x, const ValueType &y) { return alpha_val * x + beta_val * y; }); @@ -210,7 +211,7 @@ void abstract_spmv(syn::value_list, int nwarps_per_row, const matrix::Dense *beta = nullptr) { const auto nrows = a->get_size()[0]; - constexpr int subwarp_size = (info == 0) ? 32 : info; + constexpr int subwarp_size = (info == 0) ? cuda_config::warp_size : info; constexpr bool atomic = (info == 0); const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size( @@ -218,13 +219,14 @@ void abstract_spmv(syn::value_list, int nwarps_per_row, b->get_size()[1], 1); if (alpha == nullptr && beta == nullptr) { kernel::spmv<<>>( - nrows, as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), - a->get_stride(), a->get_num_stored_elements_per_row(), + nrows, nwarps_per_row, as_cuda_type(a->get_const_values()), + a->get_const_col_idxs(), a->get_stride(), + a->get_num_stored_elements_per_row(), as_cuda_type(b->get_const_values()), b->get_stride(), as_cuda_type(c->get_values()), c->get_stride()); } else if (alpha != nullptr && beta != nullptr) { kernel::spmv<<>>( - nrows, as_cuda_type(alpha->get_const_values()), + nrows, nwarps_per_row, as_cuda_type(alpha->get_const_values()), as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), a->get_stride(), a->get_num_stored_elements_per_row(), as_cuda_type(b->get_const_values()), b->get_stride(), @@ -255,16 +257,17 @@ std::array compute_subwarp_size_and_atomicity( // Use multithreads to perform the reduction on each row when the matrix is // wide. // To make every thread have computation, so pick the value which is the - // power of 2 less than 32 and is less than or equal to ell_ncols. If the - // subwarp_size is 32 and allow more than one warps to work on the same row, - // use atomic add to handle the warps write the value into the same - // position. The #warps is decided according to the number of warps allowed - // on GPU. + // power of 2 less than warp_size and is less than or equal to ell_ncols. If + // the subwarp_size is warp_size and allow more than one warps to work on + // the same row, use atomic add to handle the warps write the value into the + // same position. The #warps is decided according to the number of warps + // allowed on GPU. if (static_cast(ell_ncols) / nrows > ratio) { - while (subwarp_size < 32 && (subwarp_size << 1) <= ell_ncols) { + while (subwarp_size < cuda_config::warp_size && + (subwarp_size << 1) <= ell_ncols) { subwarp_size <<= 1; } - if (subwarp_size == 32) { + if (subwarp_size == cuda_config::warp_size) { nwarps_per_row = std::min(ell_ncols / cuda_config::warp_size, nwarps / nrows); nwarps_per_row = std::max(nwarps_per_row, 1); @@ -292,8 +295,8 @@ void spmv(std::shared_ptr exec, /** * info is the parameter for selecting the cuda kernel. - * for info == 0, it uses the kernel by 32 threads with atomic operation - * for other value, it uses the kernel without atomic_add + * for info == 0, it uses the kernel by warp_size threads with atomic + * operation for other value, it uses the kernel without atomic_add */ const int info = (!atomic) * subwarp_size; if (atomic) { @@ -323,8 +326,8 @@ void advanced_spmv(std::shared_ptr exec, /** * info is the parameter for selecting the cuda kernel. - * for info == 0, it uses the kernel by 32 threads with atomic operation - * for other value, it uses the kernel without atomic_add + * for info == 0, it uses the kernel by warp_size threads with atomic + * operation for other value, it uses the kernel without atomic_add */ const int info = (!atomic) * subwarp_size; if (atomic) { diff --git a/cuda/test/matrix/ell_kernels.cpp b/cuda/test/matrix/ell_kernels.cpp index 82e3d43d232..ff4ae0b8b88 100644 --- a/cuda/test/matrix/ell_kernels.cpp +++ b/cuda/test/matrix/ell_kernels.cpp @@ -82,8 +82,8 @@ class Ell : public ::testing::Test { } void set_up_apply_data(int num_rows = 532, int num_cols = 231, - int num_stored_elements_per_row = 0, int stride = 0, - int num_vectors = 1) + int num_vectors = 1, + int num_stored_elements_per_row = 0, int stride = 0) { mtx = Mtx::create(ref, gko::dim<2>{}, num_stored_elements_per_row, stride); @@ -148,7 +148,7 @@ TEST_F(Ell, AdvancedApplyIsEquivalentToRef) TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) { - set_up_apply_data(532, 231, 300, 600); + set_up_apply_data(532, 231, 1, 300, 600); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); @@ -159,7 +159,7 @@ TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) { - set_up_apply_data(532, 231, 300, 600); + set_up_apply_data(532, 231, 1, 300, 600); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); @@ -169,7 +169,7 @@ TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) { - set_up_apply_data(532, 231, 300, 600, 3); + set_up_apply_data(532, 231, 3, 300, 600); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); @@ -180,7 +180,7 @@ TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) TEST_F(Ell, AdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef) { - set_up_apply_data(532, 231, 300, 600, 3); + set_up_apply_data(532, 231, 3, 300, 600); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); @@ -211,6 +211,72 @@ TEST_F(Ell, AdvancedByAtomicApplyIsEquivalentToRef) } +TEST_F(Ell, SimpleApplyByAtomicToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(10, 10000, 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, AdvancedByAtomicToDenseMatrixApplyIsEquivalentToRef) +{ + set_up_apply_data(10, 10000, 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, SimpleApplyOnSmallMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, AdvancedApplyOnSmallMatrixToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10, 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, SimpleApplyOnSmallMatrixToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10, 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, AdvancedApplyOnSmallMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Ell, ConvertToDenseIsEquivalentToRef) { set_up_apply_data(); diff --git a/dev_tools/scripts/create_new_algorithm.sh b/dev_tools/scripts/create_new_algorithm.sh index 819f27c60ea..24b37b475fe 100755 --- a/dev_tools/scripts/create_new_algorithm.sh +++ b/dev_tools/scripts/create_new_algorithm.sh @@ -22,7 +22,7 @@ function print_help { } function list_sources { - for type in solver preconditioner matrix + for type in solver preconditioner matrix factorization do for i in $(ls $GINKGO_ROOT_DIR/core/$type/*.cpp) do @@ -74,7 +74,7 @@ name=${name,,} Name=${name^} NAME=${name^^} -if [[ "$name" == "" ]] || ( [[ "$source_type" != "preconditioner" ]] && [[ "$source_type" != "matrix" ]] && [[ "$source_type" != "solver" ]] ) || [[ "$source_name" == "" ]]; then +if [[ "$name" == "" ]] || ( [[ "$source_type" != "preconditioner" ]] && [[ "$source_type" != "matrix" ]] && [[ "$source_type" != "solver" ]] && [[ "$source_type" != "factorization" ]] ) || [[ "$source_name" == "" ]]; then print_help exit 1 fi diff --git a/dev_tools/scripts/update_ginkgo_header.sh b/dev_tools/scripts/update_ginkgo_header.sh index d986887aba4..df78ba7794e 100755 --- a/dev_tools/scripts/update_ginkgo_header.sh +++ b/dev_tools/scripts/update_ginkgo_header.sh @@ -1,7 +1,17 @@ #!/usr/bin/env bash +# Note: This script is supposed to support the developer and not to hinder +# the development process by setting more restrictions. +# This is the reason why every exit code is 0, otherwise, the whole +# `make` procedure would fail. + PLACE_HOLDER="#PUBLIC_HEADER_PLACE_HOLDER" +WARNING_PREFIX="[WARNING] ginkgo.hpp update script failed because:" + +RM_PARAMETER="-f" + + THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) INCLUDE_DIR="${THIS_DIR}/../../include" @@ -17,28 +27,29 @@ HEADER_LIST="global_includes.hpp.tmp" # Test if required commands are present on the system: command -v find &> /dev/null if [ ${?} -ne 0 ]; then - echo 'The command `find` is required for this script to work, but not supported by your system.' 1>&2 - exit 1 + echo "${WARNING_PREFIX} "'The command `find` is required for this script to work, but not supported by your system.' 1>&2 + exit 0 fi command -v sort &> /dev/null if [ ${?} -ne 0 ]; then - echo 'The command `sort` is required for this script to work, but not supported by your system.' 1>&2 - exit 1 + echo "${WARNING_PREFIX} "'The command `sort` is required for this script to work, but not supported by your system.' 1>&2 + exit 0 fi command -v cmp &> /dev/null if [ ${?} -ne 0 ]; then - echo 'The command `cmp` is required for this script to work, but not supported by your system.' 1>&2 - exit 1 + echo "${WARNING_PREFIX} "'The command `cmp` is required for this script to work, but not supported by your system.' 1>&2 + exit 0 fi + # Put all header files as a list (separated by newlines) in the file ${HEADER_LIST} # Requires detected files (including the path) to not contain newlines find "${TOP_HEADER_FOLDER}" -name '*.hpp' -type f -print > "${HEADER_LIST}" if [ ${?} -ne 0 ]; then - echo 'Exiting due to an error being returned by `find`!' 1>&2 - rm "${HEADER_LIST}" - exit 1 + echo "${WARNING_PREFIX} "'The `find` command returned with an error!' 1>&2 + rm "${RM_PARAMETER}" "${HEADER_LIST}" + exit 0 fi # It must be a POSIX locale in order to sort according to ASCII @@ -47,14 +58,20 @@ export LC_ALL=C sort -o "${HEADER_LIST}" "${HEADER_LIST}" if [ ${?} -ne 0 ]; then - echo 'Exiting due to an error being returned by `sort`!' 1>&2 - rm "${HEADER_LIST}" - exit 1 + echo "${WARNING_PREFIX} "'The `sort` command returned with an error!' 1>&2 + rm "${RM_PARAMETER}" "${HEADER_LIST}" + exit 0 +fi + +if [ ! -r "${GINKGO_HEADER_TEMPLATE_FILE}" ]; then + echo "${WARNING_PREFIX} The file '${GINKGO_HEADER_TEMPLATE_FILE}' can not be read!" 1>&2 + rm "${RM_PARAMETER}" "${HEADER_LIST}" + exit 0 fi # Detect the end of line type (CRLF/LF) by ${GINKGO_HEADER_TEMPLATE_FILE} END=""; -if [[ "$(file ${GINKGO_HEADER_TEMPLATE_FILE})" == *"CRLF"* ]]; then +if [[ "$(file "${GINKGO_HEADER_TEMPLATE_FILE}")" == *"CRLF"* ]]; then END="\r" fi @@ -64,6 +81,18 @@ fi # (e.g. benchmarks and examples) GINKGO_HEADER_TMP="${GINKGO_HEADER_FILE}.tmp" +# See if we have write permissions to ${GINKGO_HEADER_TMP} +echo "Test for write permissions" > "${GINKGO_HEADER_TMP}" +if [ ${?} -ne 0 ]; then + echo "${WARNING_PREFIX} No write permissions for temporary file '${GINKGO_HEADER_TMP}'!" 1>&2 + rm "${RM_PARAMETER}" "${HEADER_LIST}" + exit 0 +fi +# Remove file again, so the test does not corrupt the result +rm "${RM_PARAMETER}" "${GINKGO_HEADER_TMP}" + + + PREVIOUS_FOLDER="" # "IFS=''" sets the word delimiters for read. # An empty ${IFS} means the given name (after `read`) will be set to the whole line, @@ -101,14 +130,19 @@ while IFS='' read -r line; do fi done < "${GINKGO_HEADER_TEMPLATE_FILE}" +rm "${RM_PARAMETER}" "${HEADER_LIST}" + # Use the generated file ONLY when the public header does not exist yet # or the generated one is different to the existing one if [ ! -f "${GINKGO_HEADER_FILE}" ] || \ ! cmp -s "${GINKGO_HEADER_TMP}" "${GINKGO_HEADER_FILE}" then mv "${GINKGO_HEADER_TMP}" "${GINKGO_HEADER_FILE}" + if [ ${?} -ne 0 ]; then + echo "${WARNING_PREFIX} No permission to replace the header '${GINKGO_HEADER_FILE}'!" 1>&2 + rm "${RM_PARAMETER}" "${GINKGO_HEADER_TMP}" + exit 0 + fi else - rm "${GINKGO_HEADER_TMP}" + rm "${RM_PARAMETER}" "${GINKGO_HEADER_TMP}" fi - -rm "${HEADER_LIST}" diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml index d873dd9ee2d..4c25a288a38 100644 --- a/doc/DoxygenLayout.xml +++ b/doc/DoxygenLayout.xml @@ -3,7 +3,7 @@ - + diff --git a/doc/headers/jacobi.hpp b/doc/headers/jacobi.hpp new file mode 100644 index 00000000000..8e406d75fea --- /dev/null +++ b/doc/headers/jacobi.hpp @@ -0,0 +1,40 @@ +/************************************************************* +Copyright (c) 2017-2019, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +/** + * @defgroup jacobi Jacobi Preconditioner + * + * @brief A module dedicated to the implementation and usage of the + * Jacobi Preconditioner in Ginkgo. + * + * @ingroup precond + */ diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp index a2accaded3e..6600e7291ba 100644 --- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp +++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -/**************************************************************** +/**************************************************************** This example solves a 2D Poisson equation: \Omega = (0,1)^2 @@ -67,7 +67,7 @@ additional parameters. The intention of this is to show how generation of stencil values and the right hand side vector changes when increasing the dimension. -***************************************************************/ +***************************************************************/ #include #include diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp index 7da78bd0f0d..339f4239519 100644 --- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp +++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -/**************************************************************** +/**************************************************************** This example solves a 1D Poisson equation: u : [0, 1] -> R @@ -67,7 +67,7 @@ existing software - the `generate_stencil_matrix`, `generate_rhs`, all (i.e. they could have been there before the application developer decided to use Ginkgo, and the only part where Ginkgo is introduced is inside the `solve_system` function. -***************************************************************/ +***************************************************************/ #include #include diff --git a/examples/twentyseven-pt-stencil-solver/twentyseven-pt-stencil-solver.cpp b/examples/twentyseven-pt-stencil-solver/twentyseven-pt-stencil-solver.cpp index 78e31e4a8e9..f319ed35513 100644 --- a/examples/twentyseven-pt-stencil-solver/twentyseven-pt-stencil-solver.cpp +++ b/examples/twentyseven-pt-stencil-solver/twentyseven-pt-stencil-solver.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -/**************************************************************** +/**************************************************************** This example solves a 3D Poisson equation: \Omega = (0,1)^3 @@ -68,7 +68,7 @@ changed when passing additional parameters. The intention of this is to show how generation of stencil values and the right hand side vector changes when increasing the dimension. -***************************************************************/ +***************************************************************/ #include #include diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 31ec295d395..aeb8cb30712 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_VERSION_MINOR @Ginkgo_VERSION_MINOR@ #define GKO_VERSION_PATCH @Ginkgo_VERSION_PATCH@ #define GKO_VERSION_TAG "@Ginkgo_VERSION_TAG@" +#define GKO_VERSION_STR @Ginkgo_VERSION_MAJOR@, @Ginkgo_VERSION_MINOR@, @Ginkgo_VERSION_PATCH@ // clang-format on /* diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 3a58e253595..1855a9dc5c7 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -305,6 +305,31 @@ class BadDimension : public Error { }; +/** + * ValueMismatch is thrown if two values are not equal. + */ +class ValueMismatch : public Error { +public: + /** + * Initializes a value mismatch error. + * + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The function name where the error occurred + * @param val1 The first value to be compared. + * @param val2 The second value to be compared. + * @param clarification An additional message further describing the error + */ + ValueMismatch(const std::string &file, int line, const std::string &func, + size_type val1, size_type val2, + const std::string &clarification) + : Error(file, line, + func + ": Value mismatch : " + std::to_string(val1) + " and " + + std::to_string(val2) + " : " + clarification) + {} +}; + + /** * AllocationError is thrown if a memory allocation fails. */ diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index 778b496bccf..41225f06c98 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -89,17 +89,21 @@ namespace gko { /** - * Creates a NotSupported exception. + * Throws a NotSupported exception. * This macro sets the correct information about the location of the error - * and fills the exception with data about _obj. + * and fills the exception with data about _obj, followed by throwing it. * * @param _obj the object referenced by NotSupported exception - * - * @return NotSupported */ -#define GKO_NOT_SUPPORTED(_obj) \ - ::gko::NotSupported(__FILE__, __LINE__, __func__, \ - ::gko::name_demangling::get_type_name(typeid(_obj))) +#define GKO_NOT_SUPPORTED(_obj) \ + { \ + throw ::gko::NotSupported( \ + __FILE__, __LINE__, __func__, \ + ::gko::name_demangling::get_type_name(typeid(_obj))); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") namespace detail { @@ -117,6 +121,18 @@ inline dim<2> get_size(const dim<2> &size) { return size; } } // namespace detail +/** + *Asserts that _val1 and _val2 are equal. + * + *@throw ValueMisatch if _val1 is different from _val2. + */ +#define GKO_ASSERT_EQ(_val1, _val2) \ + if (_val1 != _val2) { \ + throw ::gko::ValueMismatch(__FILE__, __LINE__, __func__, _val1, _val2, \ + "expected equal values"); \ + } + + /** *Asserts that _op1 is a square matrix. * diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 5561d4064b2..115978f9b18 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -897,7 +897,7 @@ class CudaExecutor : public detail::ExecutorBase, major_(0), minor_(0) { - assert(device_id < max_devices); + assert(device_id < max_devices && device_id >= 0); this->set_gpu_property(); this->init_handles(); increase_num_execs(device_id); @@ -909,19 +909,19 @@ class CudaExecutor : public detail::ExecutorBase, GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); - static void increase_num_execs(int device_id) + static void increase_num_execs(unsigned device_id) { std::lock_guard guard(mutex[device_id]); num_execs[device_id]++; } - static void decrease_num_execs(int device_id) + static void decrease_num_execs(unsigned device_id) { std::lock_guard guard(mutex[device_id]); num_execs[device_id]--; } - static int get_num_execs(int device_id) + static unsigned get_num_execs(unsigned device_id) { std::lock_guard guard(mutex[device_id]); return num_execs[device_id]; @@ -941,7 +941,7 @@ class CudaExecutor : public detail::ExecutorBase, handle_manager cusparse_handle_; static constexpr int max_devices = 64; - static int num_execs[max_devices]; + static unsigned num_execs[max_devices]; static std::mutex mutex[max_devices]; }; diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp index a75e1dff392..918764fc615 100644 --- a/include/ginkgo/core/base/polymorphic_object.hpp +++ b/include/ginkgo/core/base/polymorphic_object.hpp @@ -220,6 +220,12 @@ class PolymorphicObject : public log::EnableLogging { : exec_{std::move(exec)} {} + // preserve the executor of the object + explicit PolymorphicObject(const PolymorphicObject &other) + { + *this = other; + } + /** * Implementers of PolymorphicObject should override this function instead * of create_default(). diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp index f069cf7b0e6..e238f276fab 100644 --- a/include/ginkgo/core/base/range.hpp +++ b/include/ginkgo/core/base/range.hpp @@ -305,6 +305,11 @@ class range { */ static constexpr size_type dimensionality = accessor::dimensionality; + /** + * Use the default destructor. + */ + ~range() = default; + /** * Creates a new range. * @@ -377,6 +382,8 @@ class range { return *this; } + GKO_ATTRIBUTES range(const range &other) = default; + /** * Returns the length of the specified dimension of the range. * diff --git a/include/ginkgo/core/factorization/par_ilu.hpp b/include/ginkgo/core/factorization/par_ilu.hpp index 7de220aec66..8e42b4dc5a9 100644 --- a/include/ginkgo/core/factorization/par_ilu.hpp +++ b/include/ginkgo/core/factorization/par_ilu.hpp @@ -89,7 +89,7 @@ namespace factorization { * @tparam IndexType Type of the indices of all matrices used in this class * * @ingroup factor - * @ingroup linop + * @ingroup LinOp */ template class ParIlu : public Composition { diff --git a/include/ginkgo/core/matrix/coo.hpp b/include/ginkgo/core/matrix/coo.hpp index ac6f271a153..36468411a8a 100644 --- a/include/ginkgo/core/matrix/coo.hpp +++ b/include/ginkgo/core/matrix/coo.hpp @@ -278,10 +278,8 @@ class Coo : public EnableLinOp>, col_idxs_{exec, std::forward(col_idxs)}, row_idxs_{exec, std::forward(row_idxs)} { - GKO_ENSURE_IN_BOUNDS(values_.get_num_elems() - 1, - col_idxs_.get_num_elems()); - GKO_ENSURE_IN_BOUNDS(values_.get_num_elems() - 1, - row_idxs_.get_num_elems()); + GKO_ASSERT_EQ(values_.get_num_elems(), col_idxs_.get_num_elems()); + GKO_ASSERT_EQ(values_.get_num_elems(), row_idxs_.get_num_elems()); } void apply_impl(const LinOp *b, LinOp *x) const override; diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 391308ffd1b..4dd2e5d862c 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -183,18 +183,33 @@ class Csr : public EnableLinOp>, auto nwarps = mtx_srow->get_num_elems(); if (nwarps > 0) { - auto exec = mtx_srow->get_executor()->get_master(); - Array srow_host(exec); - srow_host = *mtx_srow; - auto srow = srow_host.get_data(); - Array row_ptrs_host(exec); - row_ptrs_host = mtx_row_ptrs; - auto row_ptrs = row_ptrs_host.get_const_data(); + auto host_srow_exec = mtx_srow->get_executor()->get_master(); + auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); + const bool is_srow_on_host{host_srow_exec == + mtx_srow->get_executor()}; + const bool is_mtx_on_host{host_mtx_exec == + mtx_row_ptrs.get_executor()}; + Array row_ptrs_host(host_mtx_exec); + Array srow_host(host_srow_exec); + const index_type *row_ptrs{}; + index_type *srow{}; + if (is_srow_on_host) { + srow = mtx_srow->get_data(); + } else { + srow_host = *mtx_srow; + srow = srow_host.get_data(); + } + if (is_mtx_on_host) { + row_ptrs = mtx_row_ptrs.get_const_data(); + } else { + row_ptrs_host = mtx_row_ptrs; + row_ptrs = row_ptrs_host.get_const_data(); + } for (size_type i = 0; i < nwarps; i++) { srow[i] = 0; } - auto num_rows = mtx_row_ptrs.get_num_elems() - 1; - auto num_elems = row_ptrs[num_rows]; + const auto num_rows = mtx_row_ptrs.get_num_elems() - 1; + const auto num_elems = row_ptrs[num_rows]; for (size_type i = 0; i < num_rows; i++) { auto bucket = ceildiv((ceildiv(row_ptrs[i + 1], warp_size) * nwarps), @@ -207,7 +222,9 @@ class Csr : public EnableLinOp>, for (size_type i = 1; i < nwarps; i++) { srow[i] += srow[i - 1]; } - *mtx_srow = srow_host; + if (!is_srow_on_host) { + *mtx_srow = srow_host; + } } } @@ -249,28 +266,47 @@ class Csr : public EnableLinOp>, // if the number of stored elements is larger than 1e6 or // the maximum number of stored elements per row is larger than // 64, use load_balance otherwise use classical + auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); + const bool is_mtx_on_host{host_mtx_exec == + mtx_row_ptrs.get_executor()}; + Array row_ptrs_host(host_mtx_exec); + const index_type *row_ptrs{}; + if (is_mtx_on_host) { + row_ptrs = mtx_row_ptrs.get_const_data(); + } else { + row_ptrs_host = mtx_row_ptrs; + row_ptrs = row_ptrs_host.get_const_data(); + } const auto num_rows = mtx_row_ptrs.get_num_elems() - 1; - Array host_row_ptrs( - mtx_row_ptrs.get_executor()->get_master()); - host_row_ptrs = mtx_row_ptrs; - const auto row_val = host_row_ptrs.get_const_data(); - if (row_val[num_rows] > static_cast(1e6)) { - std::make_shared(nwarps_)->process(host_row_ptrs, - mtx_srow); - this->set_name("load_balance"); + if (row_ptrs[num_rows] > index_type(1e6)) { + load_balance actual_strategy(nwarps_); + if (is_mtx_on_host) { + actual_strategy.process(mtx_row_ptrs, mtx_srow); + } else { + actual_strategy.process(row_ptrs_host, mtx_srow); + } + this->set_name(actual_strategy.get_name()); } else { index_type maxnum = 0; for (index_type i = 1; i < num_rows + 1; i++) { - maxnum = max(maxnum, row_val[i] - row_val[i - 1]); + maxnum = max(maxnum, row_ptrs[i] - row_ptrs[i - 1]); } if (maxnum > 64) { - std::make_shared(nwarps_)->process( - host_row_ptrs, mtx_srow); - this->set_name("load_balance"); + load_balance actual_strategy(nwarps_); + if (is_mtx_on_host) { + actual_strategy.process(mtx_row_ptrs, mtx_srow); + } else { + actual_strategy.process(row_ptrs_host, mtx_srow); + } + this->set_name(actual_strategy.get_name()); } else { - std::make_shared()->process(host_row_ptrs, - mtx_srow); - this->set_name("classical"); + classical actual_strategy; + if (is_mtx_on_host) { + actual_strategy.process(mtx_row_ptrs, mtx_srow); + } else { + actual_strategy.process(row_ptrs_host, mtx_srow); + } + this->set_name(actual_strategy.get_name()); } } } @@ -498,10 +534,8 @@ class Csr : public EnableLinOp>, srow_(exec), strategy_(std::move(strategy)) { - GKO_ENSURE_IN_BOUNDS(values_.get_num_elems() - 1, - col_idxs_.get_num_elems()); - GKO_ENSURE_IN_BOUNDS(this->get_size()[0], row_ptrs_.get_num_elems()); - srow_.resize_and_reset(strategy_->clac_size(values_.get_num_elems())); + GKO_ASSERT_EQ(values_.get_num_elems(), col_idxs_.get_num_elems()); + GKO_ASSERT_EQ(this->get_size()[0] + 1, row_ptrs_.get_num_elems()); this->make_srow(); } @@ -511,9 +545,13 @@ class Csr : public EnableLinOp>, LinOp *x) const override; /** - * Compute srow, it should be run after setting value. + * Computes srow. It should be run after changing any row_ptrs_ value. */ - void make_srow() { strategy_->process(row_ptrs_, &srow_); } + void make_srow() + { + srow_.resize_and_reset(strategy_->clac_size(values_.get_num_elems())); + strategy_->process(row_ptrs_, &srow_); + } private: Array values_; diff --git a/include/ginkgo/core/matrix/ell.hpp b/include/ginkgo/core/matrix/ell.hpp index cb7fad12d49..d9572ec0759 100644 --- a/include/ginkgo/core/matrix/ell.hpp +++ b/include/ginkgo/core/matrix/ell.hpp @@ -287,10 +287,10 @@ class Ell : public EnableLinOp>, num_stored_elements_per_row_{num_stored_elements_per_row}, stride_{stride} { - GKO_ENSURE_IN_BOUNDS(num_stored_elements_per_row_ * stride_ - 1, - values_.get_num_elems()); - GKO_ENSURE_IN_BOUNDS(num_stored_elements_per_row_ * stride_ - 1, - col_idxs_.get_num_elems()); + GKO_ASSERT_EQ(num_stored_elements_per_row_ * stride_, + values_.get_num_elems()); + GKO_ASSERT_EQ(num_stored_elements_per_row_ * stride_, + col_idxs_.get_num_elems()); } void apply_impl(const LinOp *b, LinOp *x) const override; diff --git a/include/ginkgo/core/matrix/sparsity_csr.hpp b/include/ginkgo/core/matrix/sparsity_csr.hpp index 1e43ec0930f..5d1b0e580a8 100644 --- a/include/ginkgo/core/matrix/sparsity_csr.hpp +++ b/include/ginkgo/core/matrix/sparsity_csr.hpp @@ -240,9 +240,7 @@ class SparsityCsr auto tmp = Array{exec->get_master(), 1}; tmp.get_data()[0] = value; value_ = Array{exec, std::move(tmp)}; - GKO_ENSURE_IN_BOUNDS(col_idxs_.get_num_elems() - 1, - col_idxs_.get_num_elems()); - GKO_ENSURE_IN_BOUNDS(this->get_size()[0], row_ptrs_.get_num_elems()); + GKO_ASSERT_EQ(this->get_size()[0] + 1, row_ptrs_.get_num_elems()); } /** diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp index e1ee08879e9..6b41cc176da 100644 --- a/include/ginkgo/core/preconditioner/ilu.hpp +++ b/include/ginkgo/core/preconditioner/ilu.hpp @@ -214,7 +214,7 @@ class Ilu : public EnableLinOp> { l_factor = comp_cast->get_operators()[0]; u_factor = comp_cast->get_operators()[1]; } else { - throw GKO_NOT_SUPPORTED(comp_cast); + GKO_NOT_SUPPORTED(comp_cast); } GKO_ASSERT_EQUAL_DIMENSIONS(l_factor, u_factor); @@ -276,7 +276,8 @@ class Ilu : public EnableLinOp> { * */ template - struct has_with_criteria : std::false_type {}; + struct has_with_criteria : std::false_type { + }; /** * @copydoc has_with_criteria @@ -290,7 +291,8 @@ class Ilu : public EnableLinOp> { SolverType, xstd::void_t>() .with_criteria(with_criteria_param_type()))>> - : std::true_type {}; + : std::true_type { + }; /** diff --git a/include/ginkgo/core/preconditioner/jacobi.hpp b/include/ginkgo/core/preconditioner/jacobi.hpp index 92b77a6c085..1c70ee0c498 100644 --- a/include/ginkgo/core/preconditioner/jacobi.hpp +++ b/include/ginkgo/core/preconditioner/jacobi.hpp @@ -56,8 +56,6 @@ namespace preconditioner { * @tparam IndexType type used for storing indices of the matrix * * @ingroup jacobi - * @ingroup precond - * @ingroup LinOp */ template struct block_interleaved_storage_scheme { @@ -489,8 +487,7 @@ class Jacobi : public EnableLinOp>, parameters_.block_pointers.get_num_elems() - 1)), conditioning_(factory->get_executor()) { - if (parameters_.max_block_size >= 32 || - parameters_.max_block_size < 1) { + if (parameters_.max_block_size > 32 || parameters_.max_block_size < 1) { GKO_NOT_SUPPORTED(this); } parameters_.block_pointers.set_executor(this->get_executor()); diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp index 123a4182347..72173c6e49d 100644 --- a/include/ginkgo/core/solver/ir.hpp +++ b/include/ginkgo/core/solver/ir.hpp @@ -89,6 +89,9 @@ namespace solver { * its eigenvalues `lambda` have to satisfy the equation `|lambda - 1| < 1). * * @tparam ValueType precision of matrix elements + * + * @ingroup solvers + * @ingroup LinOp */ template class Ir : public EnableLinOp> { diff --git a/omp/preconditioner/jacobi_kernels.cpp b/omp/preconditioner/jacobi_kernels.cpp index dce62ae905a..1d04410a47b 100644 --- a/omp/preconditioner/jacobi_kernels.cpp +++ b/omp/preconditioner/jacobi_kernels.cpp @@ -387,7 +387,7 @@ void generate(std::shared_ptr exec, block[b].get_const_data(), block_size); } const auto local_prec = prec ? prec[g + b] : precision_reduction(); - if (local_prec == precision_reduction::autodetect()) { + if (local_prec == precision_reduction::autodetect() && cond) { using preconditioner::detail::get_supported_storage_reductions; pr_descriptors[b] = get_supported_storage_reductions( accuracy, cond[g + b], diff --git a/reference/preconditioner/jacobi_kernels.cpp b/reference/preconditioner/jacobi_kernels.cpp index 141c981c30a..f2972965273 100644 --- a/reference/preconditioner/jacobi_kernels.cpp +++ b/reference/preconditioner/jacobi_kernels.cpp @@ -369,7 +369,7 @@ void generate(std::shared_ptr exec, block[b].get_const_data(), block_size); } const auto local_prec = prec ? prec[g + b] : precision_reduction(); - if (local_prec == precision_reduction::autodetect()) { + if (local_prec == precision_reduction::autodetect() && cond) { using preconditioner::detail::get_supported_storage_reductions; pr_descriptors[b] = get_supported_storage_reductions( accuracy, cond[g + b], diff --git a/reference/test/matrix/coo_kernels.cpp b/reference/test/matrix/coo_kernels.cpp index a98ce5adf78..0c0889df6ed 100644 --- a/reference/test/matrix/coo_kernels.cpp +++ b/reference/test/matrix/coo_kernels.cpp @@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/coo_kernels.hpp" +#include + + #include @@ -90,21 +93,40 @@ class Coo : public ::testing::Test { TEST_F(Coo, ConvertsToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx->get_executor()); - - mtx->convert_to(csr_mtx.get()); - - assert_equal_to_mtx_in_csr_format(csr_mtx.get()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx->get_executor(), csr_s_merge); + + mtx->convert_to(csr_mtx_c.get()); + mtx->convert_to(csr_mtx_m.get()); + + assert_equal_to_mtx_in_csr_format(csr_mtx_c.get()); + assert_equal_to_mtx_in_csr_format(csr_mtx_m.get()); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } TEST_F(Coo, MovesToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx->get_executor()); - - mtx->move_to(csr_mtx.get()); - - assert_equal_to_mtx_in_csr_format(csr_mtx.get()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx->get_executor(), csr_s_merge); + auto mtx_clone = mtx->clone(); + + mtx->move_to(csr_mtx_c.get()); + mtx_clone->move_to(csr_mtx_m.get()); + + assert_equal_to_mtx_in_csr_format(csr_mtx_c.get()); + assert_equal_to_mtx_in_csr_format(csr_mtx_m.get()); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index fc30c4a89df..ce24a8ef42a 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -331,17 +332,49 @@ TEST_F(Dense, MovesToCoo) } -TEST_F(Dense, ConvertsToCsr) +TEST_F(Dense, ConvertsEmptyMatrixToCsr) +{ + auto strategy = std::make_shared::load_balance>(0); + auto from_mtx = gko::matrix::Dense<>::create(exec, gko::dim<2>{0, 0}); + auto to_mtx = + gko::matrix::Csr<>::create(exec, gko::dim<2>{0, 0}, 0, strategy); + + from_mtx->convert_to(to_mtx.get()); + + ASSERT_FALSE(to_mtx->get_size()); +} + + +TEST_F(Dense, MovesEmptyMatrixToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx4->get_executor()); + auto strategy = std::make_shared::load_balance>(0); + auto from_mtx = gko::matrix::Dense<>::create(exec, gko::dim<2>{0, 0}); + auto to_mtx = + gko::matrix::Csr<>::create(exec, gko::dim<2>{0, 0}, 0, strategy); - mtx4->convert_to(csr_mtx.get()); - auto v = csr_mtx->get_const_values(); - auto c = csr_mtx->get_const_col_idxs(); - auto r = csr_mtx->get_const_row_ptrs(); + from_mtx->move_to(to_mtx.get()); - ASSERT_EQ(csr_mtx->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(csr_mtx->get_num_stored_elements(), 4); + ASSERT_FALSE(to_mtx->get_size()); +} + + +TEST_F(Dense, ConvertsToCsr) +{ + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx4->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx4->get_executor(), csr_s_merge); + + mtx4->convert_to(csr_mtx_c.get()); + mtx4->convert_to(csr_mtx_m.get()); + + auto v = csr_mtx_c->get_const_values(); + auto c = csr_mtx_c->get_const_col_idxs(); + auto r = csr_mtx_c->get_const_row_ptrs(); + ASSERT_EQ(csr_mtx_c->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(csr_mtx_c->get_num_stored_elements(), 4); EXPECT_EQ(r[0], 0); EXPECT_EQ(r[1], 3); EXPECT_EQ(r[2], 4); @@ -353,20 +386,30 @@ TEST_F(Dense, ConvertsToCsr) EXPECT_EQ(v[1], 3.0); EXPECT_EQ(v[2], 2.0); EXPECT_EQ(v[3], 5.0); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + GKO_ASSERT_MTX_NEAR(csr_mtx_c.get(), csr_mtx_m.get(), 0.0); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } TEST_F(Dense, MovesToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx4->get_executor()); - - mtx4->move_to(csr_mtx.get()); - auto v = csr_mtx->get_const_values(); - auto c = csr_mtx->get_const_col_idxs(); - auto r = csr_mtx->get_const_row_ptrs(); - - ASSERT_EQ(csr_mtx->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(csr_mtx->get_num_stored_elements(), 4); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx4->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx4->get_executor(), csr_s_merge); + auto mtx_clone = mtx4->clone(); + + mtx4->move_to(csr_mtx_c.get()); + mtx_clone->move_to(csr_mtx_m.get()); + + auto v = csr_mtx_c->get_const_values(); + auto c = csr_mtx_c->get_const_col_idxs(); + auto r = csr_mtx_c->get_const_row_ptrs(); + ASSERT_EQ(csr_mtx_c->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(csr_mtx_c->get_num_stored_elements(), 4); EXPECT_EQ(r[0], 0); EXPECT_EQ(r[1], 3); EXPECT_EQ(r[2], 4); @@ -378,6 +421,9 @@ TEST_F(Dense, MovesToCsr) EXPECT_EQ(v[1], 3.0); EXPECT_EQ(v[2], 2.0); EXPECT_EQ(v[3], 5.0); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + GKO_ASSERT_MTX_NEAR(csr_mtx_c.get(), csr_mtx_m.get(), 0.0); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } diff --git a/reference/test/matrix/ell_kernels.cpp b/reference/test/matrix/ell_kernels.cpp index 4dc96332eb8..806949caf9b 100644 --- a/reference/test/matrix/ell_kernels.cpp +++ b/reference/test/matrix/ell_kernels.cpp @@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include @@ -347,21 +350,40 @@ TEST_F(Ell, MovesWithStrideToDense) TEST_F(Ell, ConvertsToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx1->get_executor()); - - mtx1->convert_to(csr_mtx.get()); - - assert_equal_to_mtx(csr_mtx.get()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_merge); + + mtx1->convert_to(csr_mtx_c.get()); + mtx1->convert_to(csr_mtx_m.get()); + + assert_equal_to_mtx(csr_mtx_c.get()); + assert_equal_to_mtx(csr_mtx_m.get()); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } TEST_F(Ell, ConvertsWithStrideToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx2->get_executor()); - - mtx2->convert_to(csr_mtx.get()); - - assert_equal_to_mtx(csr_mtx.get()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx2->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx2->get_executor(), csr_s_merge); + auto mtx_clone = mtx2->clone(); + + mtx2->move_to(csr_mtx_c.get()); + mtx_clone->move_to(csr_mtx_m.get()); + + assert_equal_to_mtx(csr_mtx_c.get()); + assert_equal_to_mtx(csr_mtx_m.get()); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } diff --git a/reference/test/matrix/hybrid_kernels.cpp b/reference/test/matrix/hybrid_kernels.cpp index c2512c5afd9..1ca6b61cd20 100644 --- a/reference/test/matrix/hybrid_kernels.cpp +++ b/reference/test/matrix/hybrid_kernels.cpp @@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/hybrid_kernels.hpp" +#include + + #include @@ -244,21 +247,40 @@ TEST_F(Hybrid, MovesToDense) TEST_F(Hybrid, ConvertsToCsr) { - auto csr_mtx = Csr::create(mtx1->get_executor()); - - mtx1->convert_to(csr_mtx.get()); - - assert_equal_to_mtx(csr_mtx.get()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_merge); + + mtx1->convert_to(csr_mtx_c.get()); + mtx1->convert_to(csr_mtx_m.get()); + + assert_equal_to_mtx(csr_mtx_c.get()); + assert_equal_to_mtx(csr_mtx_m.get()); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } TEST_F(Hybrid, MovesToCsr) { - auto csr_mtx = Csr::create(mtx1->get_executor()); - - mtx1->move_to(csr_mtx.get()); - - assert_equal_to_mtx(csr_mtx.get()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_merge); + auto mtx_clone = mtx1->clone(); + + mtx1->move_to(csr_mtx_c.get()); + mtx_clone->move_to(csr_mtx_m.get()); + + assert_equal_to_mtx(csr_mtx_c.get()); + assert_equal_to_mtx(csr_mtx_m.get()); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } diff --git a/reference/test/matrix/sellp_kernels.cpp b/reference/test/matrix/sellp_kernels.cpp index 2ae24e260b9..102e218dbb4 100644 --- a/reference/test/matrix/sellp_kernels.cpp +++ b/reference/test/matrix/sellp_kernels.cpp @@ -200,29 +200,48 @@ TEST_F(Sellp, MovesToDense) TEST_F(Sellp, ConvertsToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx1->get_executor()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_merge); - mtx1->convert_to(csr_mtx.get()); + mtx1->convert_to(csr_mtx_c.get()); + mtx1->convert_to(csr_mtx_m.get()); // clang-format off - GKO_ASSERT_MTX_NEAR(csr_mtx, - l({{1.0, 3.0, 2.0}, - {0.0, 5.0, 0.0}}), 0.0); + GKO_ASSERT_MTX_NEAR(csr_mtx_c, + l({{1.0, 3.0, 2.0}, + {0.0, 5.0, 0.0}}), 0.0); // clang-format on + GKO_ASSERT_MTX_NEAR(csr_mtx_c.get(), csr_mtx_m.get(), 0.0); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } TEST_F(Sellp, MovesToCsr) { - auto csr_mtx = gko::matrix::Csr<>::create(mtx1->get_executor()); + auto csr_s_classical = std::make_shared::classical>(); + auto csr_s_merge = std::make_shared::merge_path>(); + auto csr_mtx_c = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_classical); + auto csr_mtx_m = + gko::matrix::Csr<>::create(mtx1->get_executor(), csr_s_merge); + auto mtx_clone = mtx1->clone(); - mtx1->move_to(csr_mtx.get()); + mtx1->move_to(csr_mtx_c.get()); + mtx_clone->move_to(csr_mtx_m.get()); // clang-format off - GKO_ASSERT_MTX_NEAR(csr_mtx, - l({{1.0, 3.0, 2.0}, - {0.0, 5.0, 0.0}}), 0.0); + GKO_ASSERT_MTX_NEAR(csr_mtx_c, + l({{1.0, 3.0, 2.0}, + {0.0, 5.0, 0.0}}), 0.0); // clang-format on + GKO_ASSERT_MTX_NEAR(csr_mtx_c.get(), csr_mtx_m.get(), 0.0); + ASSERT_EQ(csr_mtx_c->get_strategy(), csr_s_classical); + ASSERT_EQ(csr_mtx_m->get_strategy(), csr_s_merge); } diff --git a/test_install/CMakeLists.txt b/test_install/CMakeLists.txt index e3f9eba73d0..7eef8b8b7dd 100644 --- a/test_install/CMakeLists.txt +++ b/test_install/CMakeLists.txt @@ -7,6 +7,9 @@ find_package(Ginkgo REQUIRED # Alternatively, use `cmake -DCMAKE_PREFIX_PATH=` to specify the install directory ) +if(GINKGO_HAVE_PAPI_SDE) + find_package(PAPI REQUIRED OPTIONAL_COMPONENTS sde) +endif() # Needed because of a known issue with CUDA while linking statically. # For details, see https://gitlab.kitware.com/cmake/cmake/issues/18614 diff --git a/test_install/test_install.cpp b/test_install/test_install.cpp index 5205743cd7c..1479f74fcb8 100644 --- a/test_install/test_install.cpp +++ b/test_install/test_install.cpp @@ -248,6 +248,7 @@ int main(int, char **) { using Mtx = gko::matrix::SparsityCsr<>; auto test = Mtx::create(refExec, gko::dim<2>{2, 2}); + } // core/preconditioner/ilu.hpp { diff --git a/third_party/git-cmake-format/CMakeLists.txt b/third_party/git-cmake-format/CMakeLists.txt index a733de04a8f..c05253a738c 100644 --- a/third_party/git-cmake-format/CMakeLists.txt +++ b/third_party/git-cmake-format/CMakeLists.txt @@ -1,5 +1,5 @@ ginkgo_load_git_package(git-cmake-format - "https://github.com/gflegar/git-cmake-format.git" - "9fdc1553c525b3d7ce758892fe666078903a1b21") + "https://github.com/ginkgo-project/git-cmake-format.git" + "e19ab13e640d58abd3bfdbff5f77b499b2ec4169") add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/build EXCLUDE_FROM_ALL)