Skip to content

Commit

Permalink
A couple of fixes found with nvcc build (#78)
Browse files Browse the repository at this point in the history
* Fixed some errors and warnings with hipcc(nvcc backend) build
Co-authored-by: Matt Belhorn and Eiden Yoshida
  • Loading branch information
feizheng10 authored Jan 7, 2020
1 parent bdb82c6 commit 828de1a
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 41 deletions.
17 changes: 12 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,29 @@ if( USE_HIP_CLANG )
elseif( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" )
# For now, we assume hipcc compiler means to compile for CUDA backend
message( STATUS "HIPCC compiler detected; CUDA backend selected" )


set( ENV{HIP_PLATFORM} nvcc )
set( HIP_PLATFORM "nvcc" )
set( CMAKE_C_COMPILE_OPTIONS_PIC "-Xcompiler ${CMAKE_C_COMPILE_OPTIONS_PIC}" )
set( CMAKE_CXX_COMPILE_OPTIONS_PIC "-Xcompiler ${CMAKE_CXX_COMPILE_OPTIONS_PIC}" )
set( CMAKE_SHARED_LIBRARY_C_FLAGS "-Xlinker ${CMAKE_SHARED_LIBRARY_C_FLAGS}" )
set( CMAKE_SHARED_LIBRARY_CXX_FLAGS "-Xlinker ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}" )
set( CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-Xlinker -soname," )
set( CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG "-Xlinker -soname," )

set( CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG "-Xlinker -rpath," )
set( CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG "-Xlinker -rpath," )
set( CMAKE_EXECUTABLE_RUNTIME_C_FLAG "-Xlinker -rpath," )
set( CMAKE_EXECUTABLE_RUNTIME_CXX_FLAG "-Xlinker -rpath," )
set( CMAKE_C_COMPILE_OPTIONS_VISIBILITY "-Xcompiler ${CMAKE_C_COMPILE_OPTIONS_VISIBILITY}" )
set( CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY "-Xcompiler ${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY}" )
set( CMAKE_C_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN "-Xcompiler ${CMAKE_C_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}" )
set( CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN "-Xcompiler ${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}" )

#TODO:
# We should be able to set library visibility in library/src/CMakeLists.txt only.
# Need to investigate the below still required or not.
#set( CMAKE_C_COMPILE_OPTIONS_VISIBILITY "-Xcompiler='${CMAKE_C_COMPILE_OPTIONS_VISIBILITY}'" )
#set( CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY "-Xcompiler='${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY}'" )
#set( CMAKE_C_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN "-Xcompiler='${CMAKE_C_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}'" )
#set( CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN "-Xcompiler='${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}'" )

elseif( CMAKE_CXX_COMPILER MATCHES ".*/hcc$" )
message( STATUS "HCC compiler set; ROCm backend selected" )
endif( )
Expand Down
2 changes: 1 addition & 1 deletion clients/tests/accuracy_test_1D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ void normal_1D_real_to_complex_interleaved(size_t N,
fftw_vector<std::complex<Tfloat>> gpu_out_comp(osize);
hip_status = hipMemcpy(
gpu_out_comp.data(), gpu_out, osize * sizeof(std::complex<Tfloat>), hipMemcpyDeviceToHost);
ASSERT_TRUE(hip_status == hipSuccess) << "hipMemcpy failure";
ASSERT_TRUE(hip_status == hipSuccess) << "hipMemcpy failure " << hipGetErrorString(hip_status);

if(verbose > 1)
{
Expand Down
8 changes: 7 additions & 1 deletion library/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,13 @@ set_target_properties( rocfft PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BIN
set_target_properties( rocfft PROPERTIES DEBUG_POSTFIX "-d" )
set_target_properties( rocfft PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON )

set_target_properties( rocfft PROPERTIES CXX_VISIBILITY_PRESET "hidden" VISIBILITY_INLINES_HIDDEN ON )
#TODO:
# hipcc(with nvcc backend) build has problem for share library visibility,
# need to figure out the reason and enable visibility "hidden" for nvcc eventually.
if(NOT HIP_PLATFORM STREQUAL "nvcc")
set_target_properties( rocfft PROPERTIES CXX_VISIBILITY_PRESET "hidden" VISIBILITY_INLINES_HIDDEN ON )
endif()

generate_export_header( rocfft EXPORT_FILE_NAME ${PROJECT_BINARY_DIR}/include/rocfft-export.h )

# Following Boost conventions of prefixing 'lib' on static built libraries, across all platforms
Expand Down
7 changes: 5 additions & 2 deletions library/src/device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,10 @@ add_library( rocfft-device
)
add_library( roc::rocfft-device ALIAS rocfft-device )
target_compile_features( rocfft-device PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type )
target_compile_options (rocfft-device PRIVATE -fno-gpu-rdc)

if( CMAKE_CXX_COMPILER MATCHES ".*/hcc$" OR HIP_PLATFORM STREQUAL "hip-clang")
target_compile_options (rocfft-device PRIVATE -fno-gpu-rdc)
endif()

# Remove this check when we no longer build with older rocm stack(ie < 1.8.2)
if(TARGET hip::device)
Expand All @@ -94,7 +97,7 @@ target_link_libraries( rocfft-device PRIVATE hip::hip_hcc hip::hip_device hcc::h
endif()

if(HIP_PLATFORM STREQUAL "nvcc")
target_compile_options( rocfft-device PRIVATE "-gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_30,code=sm_30" )
target_compile_options( rocfft-device PRIVATE "-gencode arch=compute_75,code=sm_75 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_60,code=sm_60" )
endif()

if( CMAKE_CXX_COMPILER MATCHES ".*/hcc$" OR HIP_PLATFORM STREQUAL "hip-clang")
Expand Down
3 changes: 2 additions & 1 deletion library/src/device/complex2real.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ __global__ void hermitian2complex_kernel(size_t hermitian_size,
{
T res = input[0];
outputs[0] = res;
outputc[0] = T(res.x, -res.y);
res.y = -res.y;
outputc[0] = res;
}
}

Expand Down
38 changes: 15 additions & 23 deletions library/src/device/real2complex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,21 +326,17 @@ __global__ void real_post_process_kernel(const size_t half_N,
{
const Tcomplex p = input[idx_p];
const Tcomplex q = input[idx_q];

const Tcomplex u(0.5 * (p.x + q.x), 0.5 * (p.y - q.y)); // 0.5*(p + conj(q))
const Tcomplex v(0.5 * (p.x - q.x), 0.5 * (p.y + q.y)); // 0.5*(p - conj(q))
const Tcomplex u = 0.5 * (p + q);
const Tcomplex v = 0.5 * (p - q);

const Tcomplex twd_p = twiddles[idx_p];
output[idx_p].x = u.x + v.x * twd_p.y + v.y * twd_p.x;
output[idx_p].y = u.y + v.y * twd_p.y - v.x * twd_p.x;
// NB: twd_q = -conj(twd_p) = (-twd_p.x, twd_p.y);

// NB: twd_q = -conj(twd_p) = (-twd_p.x, twd_p.y)
// Tcomplex twd_q = twiddles[idx_q];
// output[idx_q].x = u.x - v.x * twd_q.y + v.y * twd_q.x;
// output[idx_q].y = -u.y + v.y * twd_q.y + v.x * twd_q.x;
output[idx_p].x = u.x + v.x * twd_p.y + u.y * twd_p.x;
output[idx_p].y = v.y + u.y * twd_p.y - v.x * twd_p.x;

output[idx_q].x = u.x - v.x * twd_p.y - v.y * twd_p.x;
output[idx_q].y = -u.y + v.y * twd_p.y - v.x * twd_p.x;
output[idx_q].x = u.x - v.x * twd_p.y - u.y * twd_p.x;
output[idx_q].y = -v.y + u.y * twd_p.y - v.x * twd_p.x;
}
}
}
Expand Down Expand Up @@ -394,21 +390,17 @@ __global__ void real_pre_process_kernel(const size_t half_N,
const Tcomplex p = input[idx_p];
const Tcomplex q = input[idx_q];

const Tcomplex u(p.x + q.x, p.y - q.y); // p + conj(q)
const Tcomplex v(p.x - q.x, p.y + q.y); // p - conj(q)

const Tcomplex twd_p(-twiddles[idx_p].x, twiddles[idx_p].y);
// NB: twd_q = -conj(twd_p)
const Tcomplex u = p + q;
const Tcomplex v = p - q;

output[idx_p].x = u.x + v.x * twd_p.y + v.y * twd_p.x;
output[idx_p].y = u.y + v.y * twd_p.y - v.x * twd_p.x;
const Tcomplex twd_p = twiddles[idx_p];
// NB: twd_q = -conj(twd_p);

output[idx_q].x = u.x - v.x * twd_p.y - v.y * twd_p.x;
output[idx_q].y = -u.y + v.y * twd_p.y - v.x * twd_p.x;
output[idx_p].x = u.x + v.x * twd_p.y - u.y * twd_p.x;
output[idx_p].y = v.y + u.y * twd_p.y + v.x * twd_p.x;

// const T twd_q(-twiddles[idx_q].x, twiddles[idx_q].y);
// output[idx_q].x = u.x - v.x * twd_q.y + v.y * twd_q.x;
// output[idx_q].y = -u.y + v.y * twd_q.y + v.x * twd_q.x;
output[idx_q].x = u.x - v.x * twd_p.y + u.y * twd_p.x;
output[idx_q].y = -v.y + u.y * twd_p.y + v.x * twd_p.x;
}
}
}
Expand Down
10 changes: 2 additions & 8 deletions library/src/hipfft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,9 @@ hipfftResult hipfftMakePlan_internal(hipfftHandle plan,
if(plan->autoAllocate)
{
if(plan->workBuffer)
if(hipFree(plan->workBuffer) != HIP_SUCCESS)
if(hipFree(plan->workBuffer) != hipSuccess)
return HIPFFT_ALLOC_FAILED;
if(hipMalloc(&plan->workBuffer, workBufferSize) != HIP_SUCCESS)
if(hipMalloc(&plan->workBuffer, workBufferSize) != hipSuccess)
return HIPFFT_ALLOC_FAILED;
}
ROC_FFT_CHECK_INVALID_VALUE(
Expand Down Expand Up @@ -496,8 +496,6 @@ hipfftResult hipfftMakePlanMany(hipfftHandle plan,

size_t number_of_transforms = batch;

size_t workBufferSize = 0;

rocfft_plan_description desc = nullptr;
if((inembed != nullptr) || (onembed != nullptr))
{
Expand Down Expand Up @@ -564,10 +562,6 @@ hipfftResult hipfftMakePlanMany(hipfftHandle plan,
in_array_type = rocfft_array_type_complex_interleaved;
out_array_type = rocfft_array_type_complex_interleaved;
break;
defaut:
in_array_type = rocfft_array_type_complex_interleaved;
out_array_type = rocfft_array_type_complex_interleaved;
break;
}

ROC_FFT_CHECK_INVALID_VALUE(rocfft_plan_description_set_data_layout(desc,
Expand Down
2 changes: 2 additions & 0 deletions library/src/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ rocfft_status rocfft_execute(const rocfft_plan plan,

if(execPlan.workBufSize > 0)
{
#ifndef __NVCC__
assert(info != nullptr);
#endif
assert(info->workBufferSize >= (execPlan.workBufSize * 2 * plan->base_type_size));
}

Expand Down

1 comment on commit 828de1a

@d-croft
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That code style is better than something I saw in the Linux kernel.

Please sign in to comment.