diff --git a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc index 569c2d23157..57b8189123a 100644 --- a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc @@ -44,6 +44,9 @@ __global__ __launch_bounds__(default_block_size) void ic_init( } auto l_nz = l_row_ptrs[row + 1] - 1; auto diag = sqrt(l_vals[l_nz]); + printf("%d %lf %lf \n", static_cast(row), + static_cast(real(l_vals[l_nz])), + static_cast(real(diag))); if (is_finite(diag)) { l_vals[l_nz] = diag; } else { diff --git a/core/base/extended_float.hpp b/core/base/extended_float.hpp index bf79c75b519..6898c4ffaa2 100644 --- a/core/base/extended_float.hpp +++ b/core/base/extended_float.hpp @@ -54,6 +54,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#else +class __half; #endif // __CUDA_ARCH__ @@ -101,7 +103,7 @@ struct basic_float_traits { static constexpr bool rounds_to_nearest = true; }; -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +// #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) template <> struct basic_float_traits<__half> { using type = __half; @@ -110,7 +112,7 @@ struct basic_float_traits<__half> { static constexpr int exponent_bits = 5; static constexpr bool rounds_to_nearest = true; }; -#endif +// #endif template <> struct basic_float_traits { @@ -598,17 +600,17 @@ class complex { value_type imag() const noexcept { return imag_; } - operator std::complex() const noexcept + operator std::complex() const noexcept { - return std::complex(static_cast(real_), - static_cast(imag_)); + return std::complex(static_cast(real_), + static_cast(imag_)); } - operator std::complex() const noexcept - { - return std::complex(static_cast(real_), - static_cast(imag_)); - } + // operator std::complex() const noexcept + // { + // return std::complex(static_cast(real_), + // static_cast(imag_)); + // } template complex& operator=(const V& val) diff --git a/core/preconditioner/jacobi.cpp b/core/preconditioner/jacobi.cpp index 5f8d194fcd6..75f5e941303 100644 --- a/core/preconditioner/jacobi.cpp +++ b/core/preconditioner/jacobi.cpp @@ -319,7 +319,9 @@ void Jacobi::generate(const LinOp* system_matrix, ->extract_diagonal_linop()); auto diag_vt = ::gko::detail::temporary_conversion>:: - template create>>( + template create>, + matrix::Diagonal>>>( diag.get()); if (!diag_vt) { GKO_NOT_SUPPORTED(system_matrix); diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp index df867993551..4f40b81ad47 100644 --- a/hip/base/types.hip.hpp +++ b/hip/base/types.hip.hpp @@ -122,9 +122,9 @@ __device__ __forceinline__ __half abs(const __half& val) { return __habs(val); } #endif -#if defined(__HIPCC__) +// #if defined(__HIPCC__) __device__ __forceinline__ float sqrt(float val) { return sqrtf(val); } -__device__ __forceinline__ double sqrt(double val) { return sqrt(val); } +// __device__ __forceinline__ double sqrt(double val) { return sqrt(val); } __device__ __forceinline__ thrust::complex sqrt( thrust::complex val) { @@ -144,7 +144,7 @@ __device__ __forceinline__ __half sqrt(__half val) #else __device__ __forceinline__ __half sqrt(__half val) { return hsqrt(val); } #endif -#endif +// #endif namespace kernels { diff --git a/hip/preconditioner/jacobi_generate_instantiate.inc.hip.cpp b/hip/preconditioner/jacobi_generate_instantiate.inc.hip.cpp index cb37fd3d0f2..6589033fb56 100644 --- a/hip/preconditioner/jacobi_generate_instantiate.inc.hip.cpp +++ b/hip/preconditioner/jacobi_generate_instantiate.inc.hip.cpp @@ -53,18 +53,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -namespace detail { -#if !defined(__HIP_DEVICE_COMPILE__) -template <> -struct basic_float_traits<__half> { - using type = __half; - static constexpr int sign_bits = 1; - static constexpr int significand_bits = 10; - static constexpr int exponent_bits = 5; - static constexpr bool rounds_to_nearest = true; -}; -#endif -} // namespace detail +// namespace detail { +// #if !defined(__HIP_DEVICE_COMPILE__) +// template <> +// struct basic_float_traits<__half> { +// using type = __half; +// static constexpr int sign_bits = 1; +// static constexpr int significand_bits = 10; +// static constexpr int exponent_bits = 5; +// static constexpr bool rounds_to_nearest = true; +// }; +// #endif +// } // namespace detail namespace kernels { namespace hip { /** diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp new file mode 100644 index 00000000000..7ef38f68247 --- /dev/null +++ b/include/ginkgo/core/base/half.hpp @@ -0,0 +1,24 @@ +#ifndef GKO_BASE_HALF_HPP_ +#define GKO_BASE_HALF_HPP_ +#include +#include + + +#ifdef __CUDA_ARCH__ + + +#include + + +#elif defined(__HIP_DEVICE_COMPILE__) + + +#include + + +#endif // __CUDA_ARCH__ + + +namespace gko {} + +#endif // GKO_BASE_HALF_HPP_ diff --git a/reference/matrix/ell_kernels.cpp b/reference/matrix/ell_kernels.cpp index 1b2a1b4b243..802ab25699c 100644 --- a/reference/matrix/ell_kernels.cpp +++ b/reference/matrix/ell_kernels.cpp @@ -137,7 +137,8 @@ void advanced_spmv(std::shared_ptr exec, for (size_type j = 0; j < c->get_size()[1]; j++) { for (size_type row = 0; row < a->get_size()[0]; row++) { - arithmetic_type result = c->at(row, j); + arithmetic_type result = + static_cast(c->at(row, j)); result *= beta_val; for (size_type i = 0; i < num_stored_elements_per_row; i++) { auto val = a_vals(row + i * stride);