diff --git a/CMakeLists.txt b/CMakeLists.txt index d8190c5fb809..830f2669ec21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF) option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF) set(GPU_COMPUTE_VER "" CACHE STRING "Semicolon separated list of compute versions to be built against, e.g. '35;61'") +option(USE_RMM "Build with RAPIDS Memory Manager (RMM)" OFF) ## Copied From dmlc option(USE_HDFS "Build with HDFS support" OFF) option(USE_AZURE "Build with AZURE support" OFF) @@ -79,6 +80,9 @@ endif (R_LIB AND GOOGLE_TEST) if (USE_AVX) message(SEND_ERROR "The option 'USE_AVX' is deprecated as experimental AVX features have been removed from XGBoost.") endif (USE_AVX) +if (USE_RMM AND NOT (USE_CUDA)) + message(SEND_ERROR "`USE_RMM` must be enabled with `USE_CUDA` flag.") +endif (USE_RMM AND NOT (USE_CUDA)) #-- Sanitizer if (USE_SANITIZER) @@ -170,6 +174,9 @@ endif (R_LIB) # Plugin add_subdirectory(${xgboost_SOURCE_DIR}/plugin) +# 3rd-party libs +include(cmake/ExternalLibs.cmake) + #-- library if (BUILD_STATIC_LIB) add_library(xgboost STATIC) diff --git a/Jenkinsfile b/Jenkinsfile index 40db6f9c0fe9..2c9c40b4480e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -66,6 +66,7 @@ pipeline { 'build-cpu-non-omp': { BuildCPUNonOmp() }, 'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') }, 'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') }, + 'build-gpu-rmm-cuda10.2': { BuildCUDAWithRMM(cuda_version: '10.2') }, 'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') }, 'build-jvm-doc': { BuildJVMDoc() } ]) @@ -84,6 +85,7 @@ pipeline { 'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) }, 'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') }, 'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) }, + 'test-rmm-cpp-gpu': { TestCppGPUWithRMM(cuda_version: '10.2') }, 'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '2.4.3') }, 'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') }, 'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') }, @@ -262,6 +264,22 @@ def BuildCUDA(args) { } } +def BuildCUDAWithRMM(args) { + node('linux && cpu_build') { + unstash name: 'srcs' + echo "Build with CUDA ${args.cuda_version} and RMM" + def container_type = "rmm" + def docker_binary = "docker" + def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + sh """ + ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=rmm_test -DUSE_CUDA=ON -DUSE_RMM=ON + """ + echo 'Stashing C++ test executable (testxgboost)...' + stash name: 'xgboost_rmm_cpp_tests', includes: 'build/testxgboost' + deleteDir() + } +} + def BuildJVMPackages(args) { node('linux && cpu') { unstash name: 'srcs' @@ -368,6 +386,22 @@ def TestCppGPU(args) { } } +def TestCppGPUWithRMM(args) { + node('linux && gpu') { + unstash name: 'xgboost_rmm_cpp_tests' + unstash name: 'srcs' + echo "Test C++, CUDA ${args.cuda_version} with RMM" + def container_type = "rmm" + def docker_binary = "nvidia-docker" + def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + echo "Using a single GPU" + sh """ + ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate rmm_test && build/testxgboost --gtest_filter=-*.MGPU_*" + """ + deleteDir() + } +} + def CrossTestJVMwithJDK(args) { node('linux && cpu') { unstash name: 'xgboost4j_jar' diff --git a/cmake/ExternalLibs.cmake b/cmake/ExternalLibs.cmake new file mode 100644 index 000000000000..7181699d508f --- /dev/null +++ b/cmake/ExternalLibs.cmake @@ -0,0 +1,27 @@ +# RMM +if (USE_RMM) + # Use Conda env if available + if(DEFINED ENV{CONDA_PREFIX}) + set(CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX};${CMAKE_PREFIX_PATH}") + message(STATUS "Detected Conda environment, CMAKE_PREFIX_PATH set to: ${CMAKE_PREFIX_PATH}") + else() + message(STATUS "No Conda environment detected") + endif() + + find_path(RMM_INCLUDE "rmm" + HINTS "$ENV{RMM_ROOT}/include") + + find_library(RMM_LIBRARY "rmm" + HINTS "$ENV{RMM_ROOT}/lib" "$ENV{RMM_ROOT}/build") + + if ((NOT RMM_LIBRARY) OR (NOT RMM_INCLUDE)) + message(FATAL_ERROR "Could not locate RMM library") + endif () + + message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}") + message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") + + target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE}) + target_link_libraries(objxgboost PUBLIC ${RMM_LIBRARY} cuda) + target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1) +endif () diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index d339dc72d712..c948ce130450 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -36,7 +36,15 @@ #ifdef XGBOOST_USE_NCCL #include "nccl.h" -#endif +#endif // XGBOOST_USE_NCCL + +#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 +#include "rmm/mr/device/cuda_memory_resource.hpp" +#include "rmm/mr/device/default_memory_resource.hpp" +#include "rmm/mr/device/device_memory_resource.hpp" +#include "rmm/mr/device/pool_memory_resource.hpp" +#include "rmm/mr/device/thrust_allocator_adaptor.hpp" +#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) @@ -370,12 +378,21 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) { } namespace detail { + +#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 +template +using XGBBaseDeviceAllocator = rmm::mr::thrust_allocator; +#else // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 +template +using XGBBaseDeviceAllocator = thrust::device_malloc_allocator; +#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 + /** * \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose. */ template -struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator { - using SuperT = thrust::device_malloc_allocator; +struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator { + using SuperT = XGBBaseDeviceAllocator; using pointer = thrust::device_ptr; // NOLINT template struct rebind // NOLINT @@ -391,13 +408,19 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator { GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T)); return SuperT::deallocate(ptr, n); } +#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 + using cuda_mr = rmm::mr::cuda_memory_resource; + using pool_mr = rmm::mr::pool_memory_resource; + XGBDefaultDeviceAllocatorImpl() : SuperT(new pool_mr(new cuda_mr), cudaStream_t{0}) {} +#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 }; /** - * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs allocations if verbose. Does not initialise memory on construction. + * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs + * allocations if verbose. Does not initialise memory on construction. */ template -struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator { +struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator { using pointer = thrust::device_ptr; // NOLINT template struct rebind // NOLINT diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm new file mode 100644 index 000000000000..5f0d44f38e11 --- /dev/null +++ b/tests/ci_build/Dockerfile.rmm @@ -0,0 +1,39 @@ +ARG CUDA_VERSION +FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04 + +# Environment +ENV DEBIAN_FRONTEND noninteractive +SHELL ["/bin/bash", "-c"] # Use Bash as shell + +# Install all basic requirements +RUN \ + apt-get update && \ + apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \ + # Python + wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash Miniconda3.sh -b -p /opt/python && \ + # CMake + wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr + +ENV PATH=/opt/python/bin:$PATH + +# Create new Conda environment with RMM +RUN \ + conda create -n rmm_test -c nvidia -c rapidsai -c conda-forge -c defaults \ + python=3.7 rmm=0.14 cudatoolkit=$CUDA_VERSION + +ENV GOSU_VERSION 1.10 + +# Install lightweight sudo (not bound to TTY) +RUN set -ex; \ + wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ + chmod +x /usr/local/bin/gosu && \ + gosu nobody true + +# Default entry-point to use if running locally +# It will preserve attributes of created files +COPY entrypoint.sh /scripts/ + +WORKDIR /workspace +ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/build_via_cmake.sh b/tests/ci_build/build_via_cmake.sh index 98808141b0e8..8fc0c0acc66f 100755 --- a/tests/ci_build/build_via_cmake.sh +++ b/tests/ci_build/build_via_cmake.sh @@ -1,10 +1,21 @@ #!/usr/bin/env bash set -e +if [[ "$1" == --conda-env=* ]] +then + conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -) + echo "Activating Conda environment ${conda_env}" + shift 1 + cmake_args="$@" + source activate ${conda_env} +else + cmake_args="$@" +fi + rm -rf build mkdir build cd build -cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON +cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON make clean make -j$(nproc) cd ..