Skip to content

Commit

Permalink
[CI] Add RMM as an optional dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
hcho3 committed Jul 8, 2020
1 parent c011a63 commit 601d21b
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 6 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
option(USE_RMM "Build with RAPIDS Memory Manager (RMM)" OFF)
## Copied From dmlc
option(USE_HDFS "Build with HDFS support" OFF)
option(USE_AZURE "Build with AZURE support" OFF)
Expand Down Expand Up @@ -79,6 +80,9 @@ endif (R_LIB AND GOOGLE_TEST)
if (USE_AVX)
message(SEND_ERROR "The option 'USE_AVX' is deprecated as experimental AVX features have been removed from XGBoost.")
endif (USE_AVX)
if (USE_RMM AND NOT (USE_CUDA))
message(SEND_ERROR "`USE_RMM` must be enabled with `USE_CUDA` flag.")
endif (USE_RMM AND NOT (USE_CUDA))

#-- Sanitizer
if (USE_SANITIZER)
Expand Down Expand Up @@ -170,6 +174,9 @@ endif (R_LIB)
# Plugin
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)

# 3rd-party libs
include(cmake/ExternalLibs.cmake)

#-- library
if (BUILD_STATIC_LIB)
add_library(xgboost STATIC)
Expand Down
34 changes: 34 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pipeline {
'build-cpu-non-omp': { BuildCPUNonOmp() },
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
'build-gpu-rmm-cuda10.2': { BuildCUDAWithRMM(cuda_version: '10.2') },
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
'build-jvm-doc': { BuildJVMDoc() }
])
Expand All @@ -84,6 +85,7 @@ pipeline {
'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) },
'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') },
'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) },
'test-rmm-cpp-gpu': { TestCppGPUWithRMM(cuda_version: '10.2') },
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '2.4.3') },
'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
Expand Down Expand Up @@ -262,6 +264,22 @@ def BuildCUDA(args) {
}
}

def BuildCUDAWithRMM(args) {
node('linux && cpu_build') {
unstash name: 'srcs'
echo "Build with CUDA ${args.cuda_version} and RMM"
def container_type = "rmm"
def docker_binary = "docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
sh """
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=rmm_test -DUSE_CUDA=ON -DUSE_RMM=ON
"""
echo 'Stashing C++ test executable (testxgboost)...'
stash name: 'xgboost_rmm_cpp_tests', includes: 'build/testxgboost'
deleteDir()
}
}

def BuildJVMPackages(args) {
node('linux && cpu') {
unstash name: 'srcs'
Expand Down Expand Up @@ -368,6 +386,22 @@ def TestCppGPU(args) {
}
}

def TestCppGPUWithRMM(args) {
node('linux && gpu') {
unstash name: 'xgboost_rmm_cpp_tests'
unstash name: 'srcs'
echo "Test C++, CUDA ${args.cuda_version} with RMM"
def container_type = "rmm"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
echo "Using a single GPU"
sh """
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate rmm_test && build/testxgboost --gtest_filter=-*.MGPU_*"
"""
deleteDir()
}
}

def CrossTestJVMwithJDK(args) {
node('linux && cpu') {
unstash name: 'xgboost4j_jar'
Expand Down
27 changes: 27 additions & 0 deletions cmake/ExternalLibs.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# RMM
if (USE_RMM)
# Use Conda env if available
if(DEFINED ENV{CONDA_PREFIX})
set(CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX};${CMAKE_PREFIX_PATH}")
message(STATUS "Detected Conda environment, CMAKE_PREFIX_PATH set to: ${CMAKE_PREFIX_PATH}")
else()
message(STATUS "No Conda environment detected")
endif()

find_path(RMM_INCLUDE "rmm"
HINTS "$ENV{RMM_ROOT}/include")

find_library(RMM_LIBRARY "rmm"
HINTS "$ENV{RMM_ROOT}/lib" "$ENV{RMM_ROOT}/build")

if ((NOT RMM_LIBRARY) OR (NOT RMM_INCLUDE))
message(FATAL_ERROR "Could not locate RMM library")
endif ()

message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")

target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE})
target_link_libraries(objxgboost PUBLIC ${RMM_LIBRARY} cuda)
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
endif ()
33 changes: 28 additions & 5 deletions src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@

#ifdef XGBOOST_USE_NCCL
#include "nccl.h"
#endif
#endif // XGBOOST_USE_NCCL

#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
#include "rmm/mr/device/cuda_memory_resource.hpp"
#include "rmm/mr/device/default_memory_resource.hpp"
#include "rmm/mr/device/device_memory_resource.hpp"
#include "rmm/mr/device/pool_memory_resource.hpp"
#include "rmm/mr/device/thrust_allocator_adaptor.hpp"
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)

Expand Down Expand Up @@ -370,12 +378,21 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
}

namespace detail {

#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
template <typename T>
using XGBBaseDeviceAllocator = rmm::mr::thrust_allocator<T>;
#else // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
template <typename T>
using XGBBaseDeviceAllocator = thrust::device_malloc_allocator<T>;
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1

/**
* \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.
*/
template <class T>
struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
using SuperT = thrust::device_malloc_allocator<T>;
struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
using SuperT = XGBBaseDeviceAllocator<T>;
using pointer = thrust::device_ptr<T>; // NOLINT
template<typename U>
struct rebind // NOLINT
Expand All @@ -391,13 +408,19 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
return SuperT::deallocate(ptr, n);
}
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
using cuda_mr = rmm::mr::cuda_memory_resource;
using pool_mr = rmm::mr::pool_memory_resource<cuda_mr>;
XGBDefaultDeviceAllocatorImpl() : SuperT(new pool_mr(new cuda_mr), cudaStream_t{0}) {}
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
};

/**
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs allocations if verbose. Does not initialise memory on construction.
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
* allocations if verbose. Does not initialise memory on construction.
*/
template <class T>
struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
using pointer = thrust::device_ptr<T>; // NOLINT
template<typename U>
struct rebind // NOLINT
Expand Down
39 changes: 39 additions & 0 deletions tests/ci_build/Dockerfile.rmm
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
ARG CUDA_VERSION
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04

# Environment
ENV DEBIAN_FRONTEND noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell

# Install all basic requirements
RUN \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
# Python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr

ENV PATH=/opt/python/bin:$PATH

# Create new Conda environment with RMM
RUN \
conda create -n rmm_test -c nvidia -c rapidsai -c conda-forge -c defaults \
python=3.7 rmm=0.14 cudatoolkit=$CUDA_VERSION

ENV GOSU_VERSION 1.10

# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true

# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/

WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
13 changes: 12 additions & 1 deletion tests/ci_build/build_via_cmake.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
#!/usr/bin/env bash
set -e

if [[ "$1" == --conda-env=* ]]
then
conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -)
echo "Activating Conda environment ${conda_env}"
shift 1
cmake_args="$@"
source activate ${conda_env}
else
cmake_args="$@"
fi

rm -rf build
mkdir build
cd build
cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
make clean
make -j$(nproc)
cd ..

0 comments on commit 601d21b

Please sign in to comment.