[CI] Add RMM as an optional dependency

dmlc · Jul 8, 2020 · 601d21b · 601d21b
1 parent c011a63
commit 601d21b
Show file tree

Hide file tree

Showing 6 changed files with 147 additions and 6 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -44,6 +44,7 @@ option(USE_NCCL  "Build with NCCL to enable distributed GPU support." OFF)
 option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
 set(GPU_COMPUTE_VER "" CACHE STRING
   "Semicolon separated list of compute versions to be built against, e.g. '35;61'")
+option(USE_RMM "Build with RAPIDS Memory Manager (RMM)" OFF)
 ## Copied From dmlc
 option(USE_HDFS "Build with HDFS support" OFF)
 option(USE_AZURE "Build with AZURE support" OFF)
@@ -79,6 +80,9 @@ endif (R_LIB AND GOOGLE_TEST)
 if (USE_AVX)
   message(SEND_ERROR  "The option 'USE_AVX' is deprecated as experimental AVX features have been removed from XGBoost.")
 endif (USE_AVX)
+if (USE_RMM AND NOT (USE_CUDA))
+  message(SEND_ERROR "`USE_RMM` must be enabled with `USE_CUDA` flag.")
+endif (USE_RMM AND NOT (USE_CUDA))
 
 #-- Sanitizer
 if (USE_SANITIZER)
@@ -170,6 +174,9 @@ endif (R_LIB)
 # Plugin
 add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
 
+# 3rd-party libs
+include(cmake/ExternalLibs.cmake)
+
 #-- library
 if (BUILD_STATIC_LIB)
   add_library(xgboost STATIC)

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -66,6 +66,7 @@ pipeline {
             'build-cpu-non-omp': { BuildCPUNonOmp() },
             'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
             'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
+            'build-gpu-rmm-cuda10.2': { BuildCUDAWithRMM(cuda_version: '10.2') },
             'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
             'build-jvm-doc': { BuildJVMDoc() }
           ])
@@ -84,6 +85,7 @@ pipeline {
             'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) },
             'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') },
             'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) },
+            'test-rmm-cpp-gpu': { TestCppGPUWithRMM(cuda_version: '10.2') },
             'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '2.4.3') },
             'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
             'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
@@ -262,6 +264,22 @@ def BuildCUDA(args) {
   }
 }
 
+def BuildCUDAWithRMM(args) {
+  node('linux && cpu_build') {
+    unstash name: 'srcs'
+    echo "Build with CUDA ${args.cuda_version} and RMM"
+    def container_type = "rmm"
+    def docker_binary = "docker"
+    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    sh """
+    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=rmm_test -DUSE_CUDA=ON -DUSE_RMM=ON
+    """
+    echo 'Stashing C++ test executable (testxgboost)...'
+    stash name: 'xgboost_rmm_cpp_tests', includes: 'build/testxgboost'
+    deleteDir()
+  }
+}
+
 def BuildJVMPackages(args) {
   node('linux && cpu') {
     unstash name: 'srcs'
@@ -368,6 +386,22 @@ def TestCppGPU(args) {
   }
 }
 
+def TestCppGPUWithRMM(args) {
+  node('linux && gpu') {
+    unstash name: 'xgboost_rmm_cpp_tests'
+    unstash name: 'srcs'
+    echo "Test C++, CUDA ${args.cuda_version} with RMM"
+    def container_type = "rmm"
+    def docker_binary = "nvidia-docker"
+    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    echo "Using a single GPU"
+    sh """
+    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate rmm_test && build/testxgboost --gtest_filter=-*.MGPU_*"
+    """
+    deleteDir()
+  }
+}
+
 def CrossTestJVMwithJDK(args) {
   node('linux && cpu') {
     unstash name: 'xgboost4j_jar'

diff --git a/cmake/ExternalLibs.cmake b/cmake/ExternalLibs.cmake
@@ -0,0 +1,27 @@
+# RMM
+if (USE_RMM)
+  # Use Conda env if available
+  if(DEFINED ENV{CONDA_PREFIX})
+    set(CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX};${CMAKE_PREFIX_PATH}")
+    message(STATUS "Detected Conda environment, CMAKE_PREFIX_PATH set to: ${CMAKE_PREFIX_PATH}")
+  else()
+    message(STATUS "No Conda environment detected")
+  endif()
+
+  find_path(RMM_INCLUDE "rmm"
+    HINTS "$ENV{RMM_ROOT}/include")
+
+  find_library(RMM_LIBRARY "rmm"
+    HINTS "$ENV{RMM_ROOT}/lib" "$ENV{RMM_ROOT}/build")
+
+  if ((NOT RMM_LIBRARY) OR (NOT RMM_INCLUDE))
+    message(FATAL_ERROR "Could not locate RMM library")
+  endif ()
+
+  message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
+  message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
+
+  target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE})
+  target_link_libraries(objxgboost PUBLIC ${RMM_LIBRARY} cuda)
+  target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
+endif ()
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
@@ -36,7 +36,15 @@
 
 #ifdef XGBOOST_USE_NCCL
 #include "nccl.h"
-#endif
+#endif  // XGBOOST_USE_NCCL
+
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+#include "rmm/mr/device/cuda_memory_resource.hpp"
+#include "rmm/mr/device/default_memory_resource.hpp"
+#include "rmm/mr/device/device_memory_resource.hpp"
+#include "rmm/mr/device/pool_memory_resource.hpp"
+#include "rmm/mr/device/thrust_allocator_adaptor.hpp"
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
 
@@ -370,12 +378,21 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
 }
 
 namespace detail {
+
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+template <typename T>
+using XGBBaseDeviceAllocator = rmm::mr::thrust_allocator<T>;
+#else  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+template <typename T>
+using XGBBaseDeviceAllocator = thrust::device_malloc_allocator<T>;
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+
 /**
  * \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.
  */
 template <class T>
-struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
-  using SuperT = thrust::device_malloc_allocator<T>;
+struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
+  using SuperT = XGBBaseDeviceAllocator<T>;
   using pointer = thrust::device_ptr<T>;  // NOLINT
   template<typename U>
   struct rebind  // NOLINT
@@ -391,13 +408,19 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
     return SuperT::deallocate(ptr, n);
   }
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+  using cuda_mr = rmm::mr::cuda_memory_resource;
+  using pool_mr = rmm::mr::pool_memory_resource<cuda_mr>;
+  XGBDefaultDeviceAllocatorImpl() : SuperT(new pool_mr(new cuda_mr), cudaStream_t{0}) {}
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 };
 
 /**
- * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs allocations if verbose. Does not initialise memory on construction.
+ * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
+ * allocations if verbose. Does not initialise memory on construction.
  */
 template <class T>
-struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
+struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
   using pointer = thrust::device_ptr<T>;  // NOLINT
   template<typename U>
   struct rebind  // NOLINT

diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm
@@ -0,0 +1,39 @@
+ARG CUDA_VERSION
+FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04
+
+# Environment
+ENV DEBIAN_FRONTEND noninteractive
+SHELL ["/bin/bash", "-c"]   # Use Bash as shell
+
+# Install all basic requirements
+RUN \
+    apt-get update && \
+    apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
+    # Python
+    wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+    bash Miniconda3.sh -b -p /opt/python && \
+    # CMake
+    wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
+    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr
+
+ENV PATH=/opt/python/bin:$PATH
+
+# Create new Conda environment with RMM
+RUN \
+    conda create -n rmm_test -c nvidia -c rapidsai -c conda-forge -c defaults \
+        python=3.7 rmm=0.14 cudatoolkit=$CUDA_VERSION 
+
+ENV GOSU_VERSION 1.10
+
+# Install lightweight sudo (not bound to TTY)
+RUN set -ex; \
+    wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/build_via_cmake.sh b/tests/ci_build/build_via_cmake.sh
@@ -1,10 +1,21 @@
 #!/usr/bin/env bash
 set -e
 
+if [[ "$1" == --conda-env=* ]]
+then
+  conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -)
+  echo "Activating Conda environment ${conda_env}"
+  shift 1
+  cmake_args="$@"
+  source activate ${conda_env}
+else
+  cmake_args="$@"
+fi
+
 rm -rf build
 mkdir build
 cd build
-cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
+cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
 make clean
 make -j$(nproc)
 cd ..