dmlc · CodingCat · Jul 27, 2020 · Dec 27, 2019 · Dec 30, 2019 · Jun 30, 2020
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -75,6 +75,7 @@ pipeline {
             'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
             'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2') },
             'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') },
+            'build-jvm-packages-gpu-cuda10.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '10.0') },
             'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
             'build-jvm-doc': { BuildJVMDoc() }
           ])
@@ -94,6 +95,7 @@ pipeline {
             'test-python-mgpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2', multi_gpu: true) },
             'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
             'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
+            'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
             'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') },
             'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
             'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
@@ -282,6 +284,28 @@ def BuildCUDA(args) {
   }
 }
 
+def BuildJVMPackagesWithCUDA(args) {
+  node('linux && gpu') {
+    unstash name: 'srcs'
+    echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
+    def container_type = "jvm_gpu_build"
+    def docker_binary = "nvidia-docker"
+    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    def arch_flag = ""
+    if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
+      arch_flag = "-DGPU_COMPUTE_VER=75"
+    }
+    // Use only 4 CPU cores
+    def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='--cpuset-cpus 0-3'"
+    sh """
+    ${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_jvm_packages.sh ${args.spark_version} -Duse.cuda=ON $arch_flag
+    """
+    echo "Stashing XGBoost4J JAR with CUDA ${args.cuda_version} ..."
+    stash name: 'xgboost4j_jar_gpu', includes: "jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar"
+    deleteDir()
+  }
+}
+
 def BuildJVMPackages(args) {
   node('linux && cpu') {
     unstash name: 'srcs'
@@ -386,6 +410,24 @@ def TestCppGPU(args) {
   }
 }
 
+def CrossTestJVMwithJDKGPU(args) {
+  def nodeReq = 'linux && mgpu'
+  node(nodeReq) {
+    unstash name: "xgboost4j_jar_gpu"
+    unstash name: 'srcs'
+    if (args.spark_version != null) {
+      echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, Spark ${args.spark_version}, CUDA ${args.host_cuda_version}"
+    } else {
+      echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, CUDA ${args.host_cuda_version}"
+    }
+    def container_type = "gpu_jvm"
+    def docker_binary = "nvidia-docker"
+    def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
+    sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
+    deleteDir()
+  }
+}
+
 def CrossTestJVMwithJDK(args) {
   node('linux && cpu') {
     unstash name: 'xgboost4j_jar'

diff --git a/doc/jvm/index.rst b/doc/jvm/index.rst
@@ -202,6 +202,14 @@ If you are on Mac OS and using a compiler that supports OpenMP, you need to go t
 
 in order to get the benefit of multi-threading.
 
+Building with GPU support
+-------------------------
+If you want to build XGBoost4J that supports distributed GPU training, run
+
+.. code-block:: bash
+
+  mvn -Duse.cuda=ON install
+
 ********
 Contents
 ********

diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python
 import errno
+import argparse
 import glob
 import os
 import shutil
 import subprocess
 import sys
 from contextlib import contextmanager
 
-
 # Monkey-patch the API inconsistency between Python2.X and 3.X.
 if sys.platform.startswith("linux"):
     sys.platform = "linux"
@@ -20,6 +20,7 @@
     "USE_S3": "OFF",
 
     "USE_CUDA": "OFF",
+    "USE_NCCL": "OFF",
     "JVM_BINDINGS": "ON"
 }
 
@@ -68,6 +69,10 @@ def normpath(path):
 
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
+    cli_args = parser.parse_args()
+
     if sys.platform == "darwin":
         # Enable of your compiler supports OpenMP.
         CONFIG["USE_OPENMP"] = "OFF"
@@ -88,12 +93,21 @@ def normpath(path):
             else:
                 maybe_parallel_build = ""
 
+            if cli_args.use_cuda == 'ON':
+                CONFIG['USE_CUDA'] = 'ON'
+                CONFIG['USE_NCCL'] = 'ON'
+
             args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
 
             # if enviorment set rabit_mock
             if os.getenv("RABIT_MOCK", None) is not None:
                 args.append("-DRABIT_MOCK:BOOL=ON")
 
+            # if enviorment set GPU_ARCH_FLAG
+            gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
+            if gpu_arch_flag is not None:
+                args.append("%s" % gpu_arch_flag)
+
             run("cmake .. " + " ".join(args) + maybe_generator)
             run("cmake --build . --config Release" + maybe_parallel_build)
 

diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
@@ -38,6 +38,7 @@
         <scala.version>2.12.8</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
         <hadoop.version>2.7.3</hadoop.version>
+        <use.cuda>OFF</use.cuda>
     </properties>
     <repositories>
         <repository>
@@ -52,7 +53,65 @@
         <module>xgboost4j-spark</module>
         <module>xgboost4j-flink</module>
     </modules>
+
     <profiles>
+        <profile>
+            <!-- default active profile excluding gpu related test suites -->
+            <id>default</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.scalatest</groupId>
+                        <artifactId>scalatest-maven-plugin</artifactId>
+                        <configuration>
+                            <tagsToExclude>ml.dmlc.xgboost4j.java.GpuTestSuite</tagsToExclude>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+
+        <!-- gpu profile with both cpu and gpu test suites -->
+        <profile>
+            <id>gpu</id>
+            <activation>
+                <property>
+                    <name>use.cuda</name>
+                    <value>ON</value>
+                </property>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.scalatest</groupId>
+                        <artifactId>scalatest-maven-plugin</artifactId>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+
+        <!-- gpu-with-gpu-tests profile with only gpu test suites -->
+        <profile>
+            <id>gpu-with-gpu-tests</id>
+            <properties>
+                <use.cuda>ON</use.cuda>
+            </properties>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.scalatest</groupId>
+                        <artifactId>scalatest-maven-plugin</artifactId>
+                        <configuration>
+                            <tagsToInclude>ml.dmlc.xgboost4j.java.GpuTestSuite</tagsToInclude>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+
         <profile>
             <id>release</id>
             <build>
@@ -242,6 +301,25 @@
                 <filtering>true</filtering>
             </resource>
         </resources>
+
+        <pluginManagement>
+          <plugins>
+            <plugin>
+              <groupId>org.scalatest</groupId>
+              <artifactId>scalatest-maven-plugin</artifactId>
+              <version>1.0</version>
+              <executions>
+                <execution>
+                  <id>test</id>
+                  <goals>
+                    <goal>test</goal>
+                  </goals>
+                </execution>
+              </executions>
+            </plugin>
+          </plugins>
+        </pluginManagement>
+
         <plugins>
             <plugin>
                 <groupId>org.scalastyle</groupId>
@@ -336,15 +414,6 @@
             <plugin>
                 <groupId>org.scalatest</groupId>
                 <artifactId>scalatest-maven-plugin</artifactId>
-                <version>1.0</version>
-                <executions>
-                    <execution>
-                        <id>test</id>
-                        <goals>
-                            <goal>test</goal>
-                        </goals>
-                    </execution>
-                </executions>
             </plugin>
         </plugins>
         <extensions>

diff --git a/...t4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala b/...t4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
@@ -31,15 +31,20 @@ object SparkMLlibPipeline {
 
   def main(args: Array[String]): Unit = {
 
-    if (args.length != 3) {
-      println("Usage: SparkMLlibPipeline input_path native_model_path pipeline_model_path")
+    if (args.length != 3 && args.length != 4) {
+      println("Usage: SparkMLlibPipeline input_path native_model_path pipeline_model_path " +
+        "[cpu|gpu]")
       sys.exit(1)
     }
 
     val inputPath = args(0)
     val nativeModelPath = args(1)
     val pipelineModelPath = args(2)
 
+    val (treeMethod, numWorkers) = if (args.length == 4 && args(3) == "gpu") {
+      ("gpu_hist", 1)
+    } else ("auto", 2)
+
     val spark = SparkSession
       .builder()
       .appName("XGBoost4J-Spark Pipeline Example")
@@ -76,7 +81,8 @@ object SparkMLlibPipeline {
         "objective" -> "multi:softprob",
         "num_class" -> 3,
         "num_round" -> 100,
-        "num_workers" -> 2
+        "num_workers" -> numWorkers,
+        "tree_method" -> treeMethod
       )
     )
     booster.setFeaturesCol("features")

diff --git a/...gboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala b/...gboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
@@ -28,9 +28,14 @@ object SparkTraining {
   def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       // scalastyle:off
-      println("Usage: program input_path")
+      println("Usage: program input_path [cpu|gpu]")
       sys.exit(1)
     }
+
+    val (treeMethod, numWorkers) = if (args.length == 2 && args(1) == "gpu") {
+      ("gpu_hist", 1)
+    } else ("auto", 2)
+
     val spark = SparkSession.builder().getOrCreate()
     val inputPath = args(0)
     val schema = new StructType(Array(
@@ -68,7 +73,8 @@ object SparkTraining {
       "objective" -> "multi:softprob",
       "num_class" -> 3,
       "num_round" -> 100,
-      "num_workers" -> 2,
+      "num_workers" -> numWorkers,
+      "tree_method" -> treeMethod,
       "eval_sets" -> Map("eval1" -> eval1, "eval2" -> eval2))
     val xgbClassifier = new XGBoostClassifier(xgbParam).
       setFeaturesCol("features").